コード例 #1
0
ファイル: builders.py プロジェクト: TuSun12379/topas_tools
class miller_array_builder(crystal_symmetry_builder):

    observation_types = {
        '_refln_F_squared': xray.intensity(),
        '_refln_intensity': xray.intensity(),
        '_refln_F': xray.amplitude(),
        '_refln_A': None,
    }

    def __init__(self, cif_block, base_array_info=None):
        crystal_symmetry_builder.__init__(self, cif_block)
        if base_array_info is not None:
            self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
                other_symmetry=base_array_info.crystal_symmetry_from_file,
                force=True)
        self._arrays = OrderedDict()
        if base_array_info is None:
            base_array_info = miller.array_info(source_type="cif")
        refln_containing_loops = self.get_miller_indices_containing_loops()
        for self.indices, refln_loop in refln_containing_loops:
            self.wavelength_id_array = None
            self.crystal_id_array = None
            self.scale_group_array = None
            wavelength_ids = [None]
            crystal_ids = [None]
            scale_groups = [None]
            for key, value in refln_loop.iteritems():
                # need to get these arrays first
                if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                        or key.endswith('scale_group_code')):
                    data = as_int_or_none_if_all_question_marks(
                        value, column_name=key)
                    if data is None: continue
                    counts = data.counts()
                    if len(counts) == 1: continue
                    array = miller.array(
                        miller.set(self.crystal_symmetry,
                                   self.indices).auto_anomalous(), data)
                    if key.endswith('wavelength_id'):
                        self.wavelength_id_array = array
                        wavelength_ids = counts.keys()
                    elif key.endswith('crystal_id'):
                        self.crystal_id_array = array
                        crystal_ids = counts.keys()
                    elif key.endswith('scale_group_code'):
                        self.scale_group_array = array
                        scale_groups = counts.keys()
            for label, value in sorted(refln_loop.items()):
                for w_id in wavelength_ids:
                    for crys_id in crystal_ids:
                        for scale_group in scale_groups:
                            if 'index_' in label: continue
                            key = label
                            labels = [label]
                            if (key.endswith('wavelength_id')
                                    or key.endswith('crystal_id')
                                    or key.endswith('scale_group_code')):
                                w_id = None
                                crys_id = None
                                scale_group = None
                            key_suffix = ''
                            if w_id is not None:
                                key_suffix += '_%i' % w_id
                                labels.insert(0, "wavelength_id=%i" % w_id)
                            if crys_id is not None:
                                key_suffix += '_%i' % crys_id
                                labels.insert(0, "crystal_id=%i" % crys_id)
                            if scale_group is not None:
                                key_suffix += '_%i' % scale_group
                                labels.insert(
                                    0, "scale_group_code=%i" % scale_group)
                            key += key_suffix
                            sigmas = None
                            if key in self._arrays: continue
                            array = self.flex_std_string_as_miller_array(
                                value,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            if array is None: continue
                            if '_sigma' in key:
                                sigmas_label = label
                                key = None
                                for suffix in ('', '_meas', '_calc'):
                                    if sigmas_label.replace(
                                            '_sigma', suffix) in refln_loop:
                                        key = sigmas_label.replace(
                                            '_sigma', suffix) + key_suffix
                                        break
                                if key is None:
                                    key = sigmas_label + key_suffix
                                elif key in self._arrays and self._arrays[
                                        key].sigmas() is None:
                                    sigmas = array
                                    array = self._arrays[key]
                                    check_array_sizes(array, sigmas, key,
                                                      sigmas_label)
                                    sigmas = as_flex_double(
                                        sigmas, sigmas_label)
                                    array.set_sigmas(sigmas.data())
                                    info = array.info()
                                    array.set_info(
                                        info.customized_copy(
                                            labels=info.labels +
                                            [sigmas_label]))
                                    continue
                            elif 'PHWT' in key:
                                phwt_label = label
                                fwt_label = label.replace('PHWT', 'FWT')
                                if fwt_label not in refln_loop: continue
                                phwt_array = array
                                if fwt_label in self._arrays:
                                    array = self._arrays[fwt_label]
                                    check_array_sizes(array, phwt_array,
                                                      fwt_label, phwt_label)
                                    phases = as_flex_double(
                                        phwt_array, phwt_label)
                                    info = array.info()
                                    array = array.phase_transfer(phases,
                                                                 deg=True)
                                    array.set_info(
                                        info.customized_copy(
                                            labels=info.labels + [phwt_label]))
                                    self._arrays[fwt_label] = array
                                    continue
                            elif 'HL_' in key:
                                hl_letter = key[key.find('HL_') + 3]
                                hl_key = 'HL_' + hl_letter
                                key = key.replace(hl_key, 'HL_A')
                                if key in self._arrays:
                                    continue  # this array is already dealt with
                                hl_labels = [
                                    label.replace(hl_key, 'HL_' + letter)
                                    for letter in 'ABCD'
                                ]
                                hl_keys = [
                                    key.replace(hl_key, 'HL_' + letter)
                                    for letter in 'ABCD'
                                ]
                                hl_values = [
                                    cif_block.get(hl_key)
                                    for hl_key in hl_labels
                                ]
                                if hl_values.count(None) == 0:
                                    selection = self.get_selection(
                                        hl_values[0],
                                        wavelength_id=w_id,
                                        crystal_id=crys_id,
                                        scale_group_code=scale_group)
                                    hl_values = [
                                        as_double_or_none_if_all_question_marks(
                                            hl.select(selection),
                                            column_name=lab)
                                        for hl, lab in zip(
                                            hl_values, hl_labels)
                                    ]
                                    array = miller.array(
                                        miller.set(
                                            self.crystal_symmetry,
                                            self.indices.select(
                                                selection)).auto_anomalous(),
                                        flex.hendrickson_lattman(*hl_values))
                                    labels = labels[:-1] + hl_labels
                            elif '.B_' in key or '_B_' in key:
                                if '.B_' in key:
                                    key, key_b = key.replace('.B_', '.A_'), key
                                    label, label_b = label.replace(
                                        '.B_', '.A_'), label
                                elif '_B_' in key:
                                    key, key_b = key.replace('_B', '_A'), key
                                    label, label_b = label.replace('_B',
                                                                   '_A'), label
                                if key in refln_loop and key_b in refln_loop:
                                    b_part = array.data()
                                    if key in self._arrays:
                                        info = self._arrays[key].info()
                                        a_part = self._arrays[key].data()
                                        self._arrays[key] = self._arrays[
                                            key].array(
                                                data=flex.complex_double(
                                                    a_part, b_part))
                                        self._arrays[key].set_info(
                                            info.customized_copy(
                                                labels=info.labels + [key_b]))
                                        continue
                            elif ('phase_' in key and not key.endswith('_meas')
                                  and self.crystal_symmetry.space_group()
                                  is not None):
                                alt_key1 = label.replace('phase_', 'F_')
                                alt_key2 = alt_key1 + '_au'
                                if alt_key1 in refln_loop:
                                    phase_key = label
                                    key = alt_key1 + key_suffix
                                elif alt_key2 in refln_loop:
                                    phase_key = label
                                    key = alt_key2 + key_suffix
                                else:
                                    phase_key = None
                                if phase_key is not None:
                                    phases = array.data()
                                    if key in self._arrays:
                                        array = self._arrays[key]
                                        array = as_flex_double(array, key)
                                        check_array_sizes(
                                            array, phases, key, phase_key)
                                        info = self._arrays[key].info()
                                        self._arrays[
                                            key] = array.phase_transfer(
                                                phases, deg=True)
                                        self._arrays[key].set_info(
                                            info.customized_copy(
                                                labels=info.labels +
                                                [phase_key]))
                                    else:
                                        array = self.flex_std_string_as_miller_array(
                                            refln_loop[label],
                                            wavelength_id=w_id,
                                            crystal_id=crys_id,
                                            scale_group_code=scale_group)
                                        check_array_sizes(
                                            array, phases, key, phase_key)
                                        array.phase_transfer(phases, deg=True)
                                        labels = labels + [label, phase_key]
                            if base_array_info.labels is not None:
                                labels = base_array_info.labels + labels

                            def rstrip_substrings(string, substrings):
                                for substr in substrings:
                                    if substr == '': continue
                                    if string.endswith(substr):
                                        string = string[:-len(substr)]
                                return string

                            # determine observation type
                            stripped_key = rstrip_substrings(
                                key, [
                                    key_suffix, '_au', '_meas', '_calc',
                                    '_plus', '_minus'
                                ])
                            if (stripped_key.endswith('F_squared')
                                    or stripped_key.endswith('intensity')
                                    or stripped_key.endswith('.I')
                                    or stripped_key.endswith('_I')) and (
                                        array.is_real_array()
                                        or array.is_integer_array()):
                                array.set_observation_type_xray_intensity()
                            elif (stripped_key.endswith('F')
                                  and (array.is_real_array()
                                       or array.is_integer_array())):
                                array.set_observation_type_xray_amplitude()
                            if (array.is_xray_amplitude_array()
                                    or array.is_xray_amplitude_array()):
                                # e.g. merge_equivalents treats integer arrays differently, so must
                                # convert integer observation arrays here to be safe
                                if isinstance(array.data(), flex.int):
                                    array = array.customized_copy(
                                        data=array.data().as_double())
                            array.set_info(
                                base_array_info.customized_copy(labels=labels))
                            self._arrays.setdefault(key, array)
        for key, array in self._arrays.copy().iteritems():
            if (key.endswith('_minus') or '_minus_' in key
                    or key.endswith('_plus') or '_plus_' in key):
                if '_minus' in key:
                    minus_key = key
                    plus_key = key.replace('_minus', '_plus')
                elif '_plus' in key:
                    plus_key = key
                    minus_key = key.replace('_plus', '_minus')
                if plus_key in self._arrays and minus_key in self._arrays:
                    plus_array = self._arrays.pop(plus_key)
                    minus_array = self._arrays.pop(minus_key)
                    minus_array = minus_array.customized_copy(
                        indices=-minus_array.indices()).set_info(
                            minus_array.info())
                    array = plus_array.concatenate(
                        minus_array, assert_is_similar_symmetry=False)
                    array = array.customized_copy(anomalous_flag=True)
                    array.set_info(
                        minus_array.info().customized_copy(labels=list(
                            OrderedSet(plus_array.info().labels +
                                       minus_array.info().labels))))
                    array.set_observation_type(plus_array.observation_type())
                    self._arrays.setdefault(key, array)

        if len(self._arrays) == 0:
            raise CifBuilderError("No reflection data present in cif block")

    def get_miller_indices_containing_loops(self):
        loops = []
        for loop in self.cif_block.loops.values():
            for key in loop.keys():
                if 'index_h' not in key: continue
                hkl_str = [
                    loop.get(key.replace('index_h', 'index_%s' % i))
                    for i in 'hkl'
                ]
                if hkl_str.count(None) > 0:
                    raise CifBuilderError(
                        "Miller indices missing from current CIF block (%s)" %
                        key.replace('index_h',
                                    'index_%s' % 'hkl'[hkl_str.index(None)]))
                hkl_int = []
                for i, h_str in enumerate(hkl_str):
                    try:
                        h_int = flex.int(h_str)
                    except ValueError, e:
                        raise CifBuilderError(
                            "Invalid item for Miller index %s: %s" %
                            ("HKL"[i], str(e)))
                    hkl_int.append(h_int)
                indices = flex.miller_index(*hkl_int)
                loops.append((indices, loop))
                break
        return loops
コード例 #2
0
ファイル: builders.py プロジェクト: cctbx/cctbx-playground
class miller_array_builder(crystal_symmetry_builder):

  observation_types = {
    '_refln_F_squared': xray.intensity(),
    '_refln_intensity': xray.intensity(),
    '_refln_F': xray.amplitude(),
    '_refln_A': None,
  }

  def __init__(self, cif_block, base_array_info=None, wavelengths=None):
    crystal_symmetry_builder.__init__(self, cif_block)
    if base_array_info is not None:
      self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
        other_symmetry=base_array_info.crystal_symmetry_from_file,
      force=True)
    self._arrays = OrderedDict()
    if (wavelengths is None) :
      wavelengths = {}
    if base_array_info is None:
      base_array_info = miller.array_info(source_type="cif")
    refln_containing_loops = self.get_miller_indices_containing_loops()
    for self.indices, refln_loop in refln_containing_loops:
      self.wavelength_id_array = None
      self.crystal_id_array = None
      self.scale_group_array = None
      wavelength_ids = [None]
      crystal_ids = [None]
      scale_groups = [None]
      for key, value in refln_loop.iteritems():
        # need to get these arrays first
        if (key.endswith('wavelength_id') or
            key.endswith('crystal_id') or
            key.endswith('scale_group_code')):
          data = as_int_or_none_if_all_question_marks(value, column_name=key)
          if data is None:
            continue
          counts = data.counts()
          if key.endswith('wavelength_id'):
            wavelength_ids = counts.keys()
          if len(counts) == 1: continue
          array = miller.array(
            miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data)
          if key.endswith('wavelength_id'):
            self.wavelength_id_array = array
            wavelength_ids = counts.keys()
          elif key.endswith('crystal_id'):
            self.crystal_id_array = array
            crystal_ids = counts.keys()
          elif key.endswith('scale_group_code'):
            self.scale_group_array = array
            scale_groups = counts.keys()
      for label, value in sorted(refln_loop.items()):
        for w_id in wavelength_ids:
          for crys_id in crystal_ids:
            for scale_group in scale_groups:
              if 'index_' in label: continue
              key = label
              labels = [label]
              wavelength = None
              if (key.endswith('wavelength_id') or
                    key.endswith('crystal_id') or
                    key.endswith('scale_group_code')):
                w_id = None
                crys_id = None
                scale_group = None
              key_suffix = ''
              if w_id is not None:
                key_suffix += '_%i' %w_id
                labels.insert(0, "wavelength_id=%i" %w_id)
                wavelength = wavelengths.get(w_id, None)
              if crys_id is not None:
                key_suffix += '_%i' %crys_id
                labels.insert(0, "crystal_id=%i" %crys_id)
              if scale_group is not None:
                key_suffix += '_%i' %scale_group
                labels.insert(0, "scale_group_code=%i" %scale_group)
              key += key_suffix
              sigmas = None
              if key in self._arrays: continue
              array = self.flex_std_string_as_miller_array(
                value, wavelength_id=w_id, crystal_id=crys_id,
                scale_group_code=scale_group)
              if array is None: continue
              if '_sigma' in key:
                sigmas_label = label
                key = None
                for suffix in ('', '_meas', '_calc'):
                  if sigmas_label.replace('_sigma', suffix) in refln_loop:
                    key = sigmas_label.replace('_sigma', suffix) + key_suffix
                    break
                if key is None:
                  key = sigmas_label + key_suffix
                elif key in self._arrays and self._arrays[key].sigmas() is None:
                  sigmas = array
                  array = self._arrays[key]
                  check_array_sizes(array, sigmas, key, sigmas_label)
                  sigmas = as_flex_double(sigmas, sigmas_label)
                  array.set_sigmas(sigmas.data())
                  info = array.info()
                  array.set_info(
                    info.customized_copy(labels=info.labels+[sigmas_label],
                      wavelength=wavelength))
                  continue
              elif 'PHWT' in key:
                phwt_label = label
                fwt_label = label.replace('PHWT', 'FWT')
                if fwt_label not in refln_loop: continue
                phwt_array = array
                if fwt_label in self._arrays:
                  array = self._arrays[fwt_label]
                  check_array_sizes(array, phwt_array, fwt_label, phwt_label)
                  phases = as_flex_double(phwt_array, phwt_label)
                  info = array.info()
                  array = array.phase_transfer(phases, deg=True)
                  array.set_info(
                    info.customized_copy(labels=info.labels+[phwt_label]))
                  self._arrays[fwt_label] = array
                  continue
              elif 'HL_' in key:
                hl_letter = key[key.find('HL_')+3]
                hl_key = 'HL_' + hl_letter
                key = key.replace(hl_key, 'HL_A')
                if key in self._arrays:
                  continue # this array is already dealt with
                hl_labels = [label.replace(hl_key, 'HL_'+letter) for letter in 'ABCD']
                hl_keys = [key.replace(hl_key, 'HL_'+letter) for letter in 'ABCD']
                hl_values = [cif_block.get(hl_key) for hl_key in hl_labels]
                if hl_values.count(None) == 0:
                  selection = self.get_selection(
                    hl_values[0], wavelength_id=w_id,
                    crystal_id=crys_id, scale_group_code=scale_group)
                  hl_values = [as_double_or_none_if_all_question_marks(
                    hl.select(selection), column_name=lab)
                               for hl, lab in zip(hl_values, hl_labels)]
                  array = miller.array(miller.set(
                    self.crystal_symmetry, self.indices.select(selection)
                    ).auto_anomalous(), flex.hendrickson_lattman(*hl_values))
                  labels = labels[:-1]+hl_labels
              elif '.B_' in key or '_B_' in key:
                if '.B_' in key:
                  key, key_b = key.replace('.B_', '.A_'), key
                  label, label_b = label.replace('.B_', '.A_'), label
                elif '_B_' in key:
                  key, key_b = key.replace('_B', '_A'), key
                  label, label_b = label.replace('_B', '_A'), label
                if key in refln_loop and key_b in refln_loop:
                  b_part = array.data()
                  if key in self._arrays:
                    info = self._arrays[key].info()
                    a_part = self._arrays[key].data()
                    self._arrays[key] = self._arrays[key].array(
                      data=flex.complex_double(a_part, b_part))
                    self._arrays[key].set_info(
                      info.customized_copy(labels=info.labels+[key_b]))
                    continue
              elif ('phase_' in key and not "_meas" in key and
                    self.crystal_symmetry.space_group() is not None):
                alt_key1 = label.replace('phase_', 'F_')
                alt_key2 = alt_key1 + '_au'
                if alt_key1 in refln_loop:
                  phase_key = label
                  key = alt_key1+key_suffix
                elif alt_key2 in refln_loop:
                  phase_key = label
                  key = alt_key2+key_suffix
                else: phase_key = None
                if phase_key is not None:
                  phases = array.data()
                  if key in self._arrays:
                    array = self._arrays[key]
                    array = as_flex_double(array, key)
                    check_array_sizes(array, phases, key, phase_key)
                    info = self._arrays[key].info()
                    self._arrays[key] = array.phase_transfer(phases, deg=True)
                    self._arrays[key].set_info(
                      info.customized_copy(labels=info.labels+[phase_key]))
                  else:
                    array = self.flex_std_string_as_miller_array(
                      refln_loop[label], wavelength_id=w_id, crystal_id=crys_id,
                      scale_group_code=scale_group)
                    check_array_sizes(array, phases, key, phase_key)
                    array.phase_transfer(phases, deg=True)
                    labels = labels+[label, phase_key]
              if base_array_info.labels is not None:
                labels = base_array_info.labels + labels
              def rstrip_substrings(string, substrings):
                for substr in substrings:
                  if substr == '': continue
                  if string.endswith(substr):
                    string = string[:-len(substr)]
                return string
              # determine observation type
              stripped_key = rstrip_substrings(
                key, [key_suffix, '_au', '_meas', '_calc', '_plus', '_minus'])
              if (stripped_key.endswith('F_squared') or
                  stripped_key.endswith('intensity') or
                  stripped_key.endswith('.I') or
                  stripped_key.endswith('_I')) and (
                    array.is_real_array() or array.is_integer_array()):
                array.set_observation_type_xray_intensity()
              elif (stripped_key.endswith('F') and (
                array.is_real_array() or array.is_integer_array())):
                array.set_observation_type_xray_amplitude()
              if (array.is_xray_amplitude_array() or
                  array.is_xray_amplitude_array()):
                # e.g. merge_equivalents treats integer arrays differently, so must
                # convert integer observation arrays here to be safe
                if isinstance(array.data(), flex.int):
                  array = array.customized_copy(data=array.data().as_double())
              array.set_info(base_array_info.customized_copy(labels=labels))
              if (array.is_xray_amplitude_array() or
                  array.is_xray_amplitude_array()):
                info = array.info()
                array.set_info(info.customized_copy(wavelength=wavelength))
              self._arrays.setdefault(key, array)
    for key, array in self._arrays.copy().iteritems():
      if (   key.endswith('_minus') or '_minus_' in key
          or key.endswith('_plus') or '_plus_' in key):
        if '_minus' in key:
          minus_key = key
          plus_key = key.replace('_minus', '_plus')
        elif '_plus' in key:
          plus_key = key
          minus_key = key.replace('_plus', '_minus')
        if plus_key in self._arrays and minus_key in self._arrays:
          plus_array = self._arrays.pop(plus_key)
          minus_array = self._arrays.pop(minus_key)
          minus_array = minus_array.customized_copy(
            indices=-minus_array.indices()).set_info(minus_array.info())
          array = plus_array.concatenate(
            minus_array, assert_is_similar_symmetry=False)
          array = array.customized_copy(anomalous_flag=True)
          array.set_info(minus_array.info().customized_copy(
            labels=list(
              OrderedSet(plus_array.info().labels+minus_array.info().labels))))
          array.set_observation_type(plus_array.observation_type())
          self._arrays.setdefault(key, array)

    if len(self._arrays) == 0:
      raise CifBuilderError("No reflection data present in cif block")

  def get_miller_indices_containing_loops(self):
    loops = []
    for loop in self.cif_block.loops.values():
      for key in loop.keys():
        if 'index_h' not in key: continue
        hkl_str = [loop.get(key.replace('index_h', 'index_%s' %i)) for i in 'hkl']
        if hkl_str.count(None) > 0:
          raise CifBuilderError(
            "Miller indices missing from current CIF block (%s)"
            %key.replace('index_h', 'index_%s' %'hkl'[hkl_str.index(None)]))
        hkl_int = []
        for i,h_str in enumerate(hkl_str):
          try:
            h_int = flex.int(h_str)
          except ValueError, e:
            raise CifBuilderError(
              "Invalid item for Miller index %s: %s" % ("HKL"[i], str(e)))
          hkl_int.append(h_int)
        indices = flex.miller_index(*hkl_int)
        loops.append((indices, loop))
        break
    return loops
コード例 #3
0
class miller_array_builder(crystal_symmetry_builder):
    # Changes to this class should pass regression tests:
    # cctbx_project\mmtbx\regression\tst_cif_as_mtz_wavelengths.py
    # cctbx_project\iotbx\cif\tests\tst_lex_parse_build.py
    # phenix_regression\cif_as_mtz\tst_cif_as_mtz.py

    observation_types = {
        # known types of column data to be tagged as either amplitudes or intensities as per
        # https://www.iucr.org/__data/iucr/cifdic_html/2/cif_mm.dic/index.html
        '_refln.F_squared': xray.intensity(),
        '_refln_F_squared': xray.intensity(),
        '_refln.intensity': xray.intensity(),
        '_refln.I(+)': xray.intensity(),
        '_refln.I(-)': xray.intensity(),
        '_refln.F_calc': xray.amplitude(),
        '_refln.F_meas': xray.amplitude(),
        '_refln.FP': xray.amplitude(),
        '_refln.F-obs': xray.amplitude(),
        '_refln.Fobs': xray.amplitude(),
        '_refln.F-calc': xray.amplitude(),
        '_refln.Fcalc': xray.amplitude(),
        '_refln.pdbx_F_': xray.amplitude(),
        '_refln.pdbx_I_': xray.intensity(),
        '_refln.pdbx_anom_difference': xray.amplitude(),
    }

    def guess_observationtype(self, labl):
        for okey in self.observation_types.keys():
            if labl.startswith(okey):
                return self.observation_types[okey]
        return None

    def __init__(self, cif_block, base_array_info=None, wavelengths=None):
        crystal_symmetry_builder.__init__(self, cif_block)
        self._arrays = OrderedDict()
        self._origarrays = OrderedDict(
        )  # used for presenting raw data tables in HKLviewer
        basearraylabels = []
        if base_array_info is not None:
            self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
                other_symmetry=base_array_info.crystal_symmetry_from_file,
                force=True)
            if base_array_info.labels:
                basearraylabels = base_array_info.labels
        if (wavelengths is None):
            wavelengths = {}
        if base_array_info is None:
            base_array_info = miller.array_info(source_type="cif")
        refln_containing_loops = self.get_miller_indices_containing_loops()
        for self.indices, refln_loop in refln_containing_loops:
            self.wavelength_id_array = None
            self.crystal_id_array = None
            self.scale_group_array = None
            wavelength_ids = [None]
            crystal_ids = [None]
            scale_groups = [None]
            for key, value in six.iteritems(refln_loop):
                # Get wavelength_ids, crystal_id, scale_group_code columns for selecting data of other
                # columns in self.get_selection() used by self.flex_std_string_as_miller_array()
                if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                        or key.endswith('scale_group_code')):
                    data = as_int_or_none_if_all_question_marks(
                        value, column_name=key)
                    if data is None:
                        continue
                    counts = data.counts()
                    if key.endswith('wavelength_id'):
                        wavelength_ids = list(counts.keys())
                    if len(counts) == 1: continue
                    array = miller.array(
                        miller.set(self.crystal_symmetry,
                                   self.indices).auto_anomalous(), data)
                    if key.endswith('wavelength_id'):
                        self.wavelength_id_array = array
                        wavelength_ids = list(counts.keys())
                    elif key.endswith('crystal_id'):
                        self.crystal_id_array = array
                        crystal_ids = list(counts.keys())
                    elif key.endswith('scale_group_code'):
                        self.scale_group_array = array
                        scale_groups = list(counts.keys())
            labelsuffix = []
            wavelbl = []
            cryslbl = []
            scalegrplbl = []
            self._origarrays["HKLs"] = self.indices
            alllabels = list(sorted(refln_loop.keys()))
            remaininglabls = alllabels[:]  # deep copy the list
            # Parse labels matching cif column conventions
            # https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/refln.html
            # and extract groups of labels or just single columns.
            # Groups corresponds to the map coefficients, phase and amplitudes,
            # amplitudes or intensities with sigmas and hendrickson-lattman columns.
            phaseamplabls, remaininglabls = self.get_phase_amplitude_labels(
                remaininglabls)
            mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels(
                remaininglabls)
            HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls)
            data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels(
                remaininglabls)
            for w_id in wavelength_ids:
                for crys_id in crystal_ids:
                    for scale_group in scale_groups:
                        # If reflection data files contain more than one crystal, wavelength or scalegroup
                        # then add their id(s) as a suffix to data labels computed below. Needed for avoiding
                        # ambuguity but avoid when not needed to make labels more human readable!
                        if (len(wavelength_ids) > 1
                                or len(wavelengths) > 1) and w_id is not None:
                            wavelbl = ["wavelength_id=%i" % w_id]
                        if len(crystal_ids) > 1 and crys_id is not None:
                            cryslbl = ["crystal_id=%i" % crys_id]
                        if len(scale_groups) > 1 and scale_group is not None:
                            scalegrplbl = ["scale_group_code=%i" % scale_group]
                        labelsuffix = scalegrplbl + cryslbl + wavelbl
                        jlablsufx = ""
                        if len(labelsuffix):
                            jlablsufx = "," + ",".join(labelsuffix)
                        for mapcoefflabl in mapcoefflabls:
                            A_array = refln_loop[mapcoefflabl[0]]
                            B_array = refln_loop[mapcoefflabl[1]]
                            # deselect any ? marks in the two arrays, assuming both A and B have the same ? marks
                            selection = self.get_selection(
                                A_array,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            A_array = A_array.select(selection)
                            B_array = B_array.select(selection)
                            # form the miller array with map coefficients
                            data = flex.complex_double(flex.double(A_array),
                                                       flex.double(B_array))
                            millarr = miller.array(
                                miller.set(self.crystal_symmetry,
                                           self.indices.select(
                                               selection)).auto_anomalous(),
                                data)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None: continue
                            labl = basearraylabels + mapcoefflabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=labl,
                                    wavelength=wavelengths.get(w_id, None)))
                            self._arrays[mapcoefflabl[0] + jlablsufx] = millarr
                        for phaseamplabl in phaseamplabls:
                            amplitudestrarray = refln_loop[phaseamplabl[0]]
                            phasestrarray = refln_loop[phaseamplabl[1]]
                            millarr = self.flex_std_string_as_miller_array(
                                amplitudestrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            phasesmillarr = self.flex_std_string_as_miller_array(
                                phasestrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None or phasesmillarr is None:
                                continue
                            phases = as_flex_double(phasesmillarr,
                                                    phaseamplabl[1])
                            millarr = millarr.phase_transfer(phases, deg=True)
                            labl = basearraylabels + phaseamplabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=labl,
                                    wavelength=wavelengths.get(w_id, None)))
                            self._arrays[phaseamplabl[0] + jlablsufx] = millarr
                        for datlabl, siglabl, otype in data_sig_obstype_labls:
                            datastrarray = refln_loop[datlabl]
                            millarr = self.flex_std_string_as_miller_array(
                                datastrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None: continue
                            millarr = as_flex_double(millarr, datlabl)
                            datsiglabl = [datlabl]
                            if siglabl:
                                sigmasstrarray = refln_loop[siglabl]
                                sigmas = self.flex_std_string_as_miller_array(
                                    sigmasstrarray,
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                sigmas = as_flex_double(sigmas, siglabl)
                                millarr.set_sigmas(sigmas.data())
                                datsiglabl = [datlabl, siglabl]
                            datsiglabl = basearraylabels + datsiglabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=datsiglabl,
                                    wavelength=wavelengths.get(w_id, None)))
                            if otype is not None:
                                millarr.set_observation_type(otype)
                            self._arrays[datlabl + jlablsufx] = millarr
                        for hl_labels in HLcoefflabls:
                            hl_values = [
                                cif_block.get(hl_key) for hl_key in hl_labels
                            ]
                            if hl_values.count(None) == 0:
                                selection = self.get_selection(
                                    hl_values[0],
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                hl_values = [
                                    as_double_or_none_if_all_question_marks(
                                        hl.select(selection), column_name=lab)
                                    for hl, lab in zip(hl_values, hl_labels)
                                ]
                                # hl_values will be None for column data not matching w_id,crys_id,scale_group values
                                if hl_values == [None, None, None, None]:
                                    continue
                                millarr = miller.array(
                                    miller.set(
                                        self.crystal_symmetry,
                                        self.indices.select(
                                            selection)).auto_anomalous(),
                                    flex.hendrickson_lattman(*hl_values))
                                hlabels = basearraylabels + hl_labels + labelsuffix
                                millarr.set_info(
                                    base_array_info.customized_copy(
                                        labels=hlabels,
                                        wavelength=wavelengths.get(w_id,
                                                                   None)))
                                self._arrays[hl_labels[0] +
                                             jlablsufx] = millarr
                        # pick up remaining columns if any that weren't identified above
                        for label in alllabels:
                            if "index_" in label:
                                continue
                            datastrarray = refln_loop[label]
                            if label in remaininglabls:
                                labels = basearraylabels + [label
                                                            ] + labelsuffix
                                lablsufx = jlablsufx
                                millarr = self.flex_std_string_as_miller_array(
                                    datastrarray,
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                # millarr will be None for column data not matching w_id,crys_id,scale_group values
                                if (label.endswith(
                                        'wavelength_id'
                                ) or label.endswith(
                                        'crystal_id'
                                ) or  # get full array if any of these labels, not just subsets
                                        label.endswith('scale_group_code')):
                                    millarr = self.flex_std_string_as_miller_array(
                                        datastrarray,
                                        wavelength_id=None,
                                        crystal_id=None,
                                        scale_group_code=None)
                                    lablsufx = ""
                                    labels = basearraylabels + [label]
                                if millarr is None: continue
                                otype = self.guess_observationtype(label)
                                if otype is not None:
                                    millarr.set_observation_type(otype)
                                millarr.set_info(
                                    base_array_info.customized_copy(
                                        labels=labels,
                                        wavelength=wavelengths.get(w_id,
                                                                   None)))
                                self._arrays[label + lablsufx] = millarr
                            origarr = self.flex_std_string_as_miller_array(
                                datastrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            newlabel = label.replace("_refln.", "")
                            newlabel2 = newlabel.replace("_refln_", "")
                            if origarr:  # want only genuine miller arrays
                                self._origarrays[newlabel2 +
                                                 jlablsufx] = origarr.data()
        # Convert any groups of I+,I-,SigI+,SigI- (or amplitudes) arrays into anomalous arrays
        # i.e. both friedel mates in the same array
        for key, array in six.iteritems(self._arrays.copy()):
            plus_key = ""
            if '_minus' in key:
                minus_key = key
                plus_key = key.replace('_minus', '_plus')
            elif '-' in key:
                minus_key = key
                plus_key = key.replace('-', '+')
            elif '_plus' in key:
                plus_key = key
                minus_key = key.replace('_plus', '_minus')
            elif '+' in key:
                plus_key = key
                minus_key = key.replace('+', '-')
            if plus_key in self._arrays and minus_key in self._arrays:
                plus_array = self._arrays.pop(plus_key)
                minus_array = self._arrays.pop(minus_key)
                minus_array = minus_array.customized_copy(
                    indices=-minus_array.indices()).set_info(
                        minus_array.info())
                array = plus_array.concatenate(
                    minus_array, assert_is_similar_symmetry=False)
                array = array.customized_copy(anomalous_flag=True)
                array.set_info(minus_array.info().customized_copy(labels=list(
                    OrderedSet(plus_array.info().labels +
                               minus_array.info().labels))))
                array.set_observation_type(plus_array.observation_type())
                self._arrays.setdefault(key, array)
        if len(self._arrays) == 0:
            raise CifBuilderError("No reflection data present in cif block")
        # Sort the ordered dictionary to resemble the order of columns in the cif file
        # This is to avoid any F_meas arrays accidentally being put adjacent to
        # pdbx_anom_difference arrays in the self._arrays OrderedDict. Otherwise these
        # arrays may unintentionally be combined into a reconstructed anomalous amplitude
        # array when saving as an mtz file due to a problem in the iotbx/mtz module.
        # See http://phenix-online.org/pipermail/cctbxbb/2021-March/002289.html
        arrlstord = []
        arrlst = list(self._arrays)
        for arr in arrlst:
            for i, k in enumerate(refln_loop.keys()):
                if arr.split(",")[0] == k:
                    arrlstord.append((arr, i))
        # arrlstord must have the same keys as in the self._arrays dictionary
        assert sorted(arrlst) == sorted([e[0] for e in arrlstord])
        sortarrlst = sorted(arrlstord, key=lambda arrord: arrord[1])
        self._ordarrays = OrderedDict()
        for sortkey, i in sortarrlst:
            self._ordarrays.setdefault(sortkey, self._arrays[sortkey])
        self._arrays = self._ordarrays

    def get_HL_labels(self, keys):
        lstkeys = list(keys)  # cast into list if not a list
        HLquads = []
        alllabels = " ".join(lstkeys)
        """ Hendrickson-Lattmann labels could look like: 'HLAM', 'HLBM', 'HLCM', 'HLDM'
    or like 'HLanomA', 'HLanomB', 'HLanomC', 'HLanomD'
    Use a regular expression to group them accordingly
    """
        allmatches = re.findall(r"(\S*(HL(\S*)[abcdABCD](\S*)))", alllabels)
        HLtagslst = list(set([(e[2], e[3]) for e in allmatches]))
        usedkeys = []
        for m in HLtagslst:
            hllist = []
            for hm in allmatches:
                if m == (hm[2], hm[3]):
                    hllist.append((hm[0], hm[1]))
            if len(hllist) == 4:
                HLquads.append([e[0] for e in hllist])
                for e in hllist:
                    usedkeys.append(e[0])
        remainingkeys = []
        for e in lstkeys:
            if e not in usedkeys:
                remainingkeys.append(e)
        return HLquads, remainingkeys

    def get_mapcoefficient_labels(self, keys):
        # extract map coeffficients labels from list of cif column labels
        # e.g. ( _refln.A_calc_au _refln.B_calc_au ) , ( _refln.A_calc _refln.B_calc )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        mapcoefflabels = []
        A_matches = re.findall(
            r"( (\s*_refln[\._]A_)(\S*) )", alllabels, re.VERBOSE
        )  # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')]
        for label in lstkeys:
            for m in A_matches:
                Blabel = m[1].replace("A_", "B_") + m[2]
                if Blabel == label:
                    mapcoefflabels.append([m[0], label])
                    remainingkeys.remove(m[0])
                    remainingkeys.remove(label)
        return mapcoefflabels, remainingkeys

    def get_phase_amplitude_labels(self, keys):
        # extract phase and amplitudes labels from list of cif column labels
        # e.g. ( _refln.F_calc _refln.phase_calc ) , ( _refln.FC_ALL _refln.PHIC_ALL ), ( _refln.FWT _refln.PHWT )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        phase_amplitudelabels = []
        PHmatches = re.findall(
            r"((\S*PH)([^I]\S*))", alllabels
        )  # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')]
        for label in lstkeys:
            for m in PHmatches:
                PFlabel = m[1].replace("PH", "F") + m[2]
                Flabel = m[1].replace("PH", "") + m[2]
                if Flabel == label or PFlabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        PHImatches = re.findall(
            r"((\S*PHI)(\S*))", alllabels
        )  # [('_refln.PHIC', '_refln.PHI', 'C'), ('_refln.PHIC_ALL', '_refln.PHI', 'C_ALL')]
        for label in lstkeys:
            for m in PHImatches:
                PFlabel = m[1].replace("PHI", "F") + m[2]
                Flabel = m[1].replace("PHI", "") + m[2]
                if Flabel == label or PFlabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        PHDELmatches = re.findall(
            r"(((\S*)PH)([^I]\S*(WT)))", alllabels
        )  # [('_refln.PHDELWT', '_refln.PH', '_refln.', 'DELWT', 'WT')]
        for label in lstkeys:
            for m in PHDELmatches:
                Flabel = m[2] + m[3].replace("WT", "FWT")
                if Flabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        phase_matches = re.findall(
            r"((\S*[\._])phase(\S*))",
            alllabels)  # [('_refln.phase_calc', '_refln.', '')]
        for label in lstkeys:
            for m in phase_matches:
                phaselabel = m[0]
                Flabl = m[1] + m[2]
                Flabel = m[1] + "F" + m[2]
                Faulabel = m[1] + "F" + m[2] + "_au"
                if Flabl in label or Flabel in label or Faulabel in label:  # in case of _refln.F_calc_au and _refln.phase_calc
                    if label in remainingkeys and m[
                            0] in remainingkeys:  # in case
                        if (Flabel + "_sigma_au") in remainingkeys or (
                                Flabel + "_sigma") in remainingkeys:
                            continue  # give priority to F_meas, F_meas_sigma or  F_meas_au, F_meas_sigma_au
                        phase_amplitudelabels.append([label, m[0]])
                        remainingkeys.remove(label)
                        remainingkeys.remove(m[0])
        return phase_amplitudelabels, remainingkeys

    def get_FSigF_ISigI_labels(self, keys):
        # extract amplitudea, sigmas or intensitiy, sigmas labels from list of cif column labels
        # e.g. ( _refln.F_meas_sigma_au _refln.F_meas), ( _refln.intensity_sigma _refln.intensity ) ,
        # ( _refln.pdbx_I_plus_sigma _refln.pdbx_I_plus )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        labelpairs = []
        sigma_matches = re.findall(
            r"((\S*[\._])SIG(\S*))",
            alllabels)  # catch label pairs like F(+),SIGF(+)
        for label in lstkeys:
            for m in sigma_matches:
                FIlabel = m[1] + m[2]
                if FIlabel == label:
                    labelpairs.append(
                        [label, m[0],
                         self.guess_observationtype(label)])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        sigma_matches = re.findall(
            r"((\S*)_sigma(_*\S*))", alllabels
        )  # [('_refln.F_meas_sigma_au', '_refln.F_meas', '_au'), ('_refln.intensity_sigma', '_refln.intensity', ''), ('_refln.pdbx_I_plus_sigma', '_refln.pdbx_I_plus', '')]
        for label in lstkeys:
            for m in sigma_matches:
                FIlabel = m[1] + m[2]
                if FIlabel == label:
                    labelpairs.append(
                        [label, m[0],
                         self.guess_observationtype(label)])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        # catch generic meas and sigma labels
        anymeas_matches = re.findall(r"((\S*)_meas(\S*))",
                                     alllabels) + re.findall(
                                         r"((\S*)_calc(\S*))", alllabels)
        anysigma_matches = re.findall(r"((\S*)_sigma(\S*))", alllabels)
        for mmatch in anymeas_matches:
            for smatch in anysigma_matches:
                if mmatch[1] == smatch[1] and mmatch[2] == smatch[2]:
                    remainingkeys.remove(mmatch[0])
                    if smatch[
                            0] in remainingkeys:  # in case of say F_squared_calc, F_squared_meas, F_squared_sigma all being present
                        remainingkeys.remove(smatch[0])
                        labelpairs.append([
                            mmatch[0], smatch[0],
                            self.guess_observationtype(mmatch[0])
                        ])
                    else:
                        labelpairs.append([
                            mmatch[0], None,
                            self.guess_observationtype(mmatch[0])
                        ])
        return labelpairs, remainingkeys

    def get_miller_indices_containing_loops(self):
        loops = []
        for loop in self.cif_block.loops.values():
            for key in loop.keys():
                if 'index_h' not in key: continue
                hkl_str = [
                    loop.get(key.replace('index_h', 'index_%s' % i))
                    for i in 'hkl'
                ]
                if hkl_str.count(None) > 0:
                    raise CifBuilderError(
                        "Miller indices missing from current CIF block (%s)" %
                        key.replace('index_h',
                                    'index_%s' % 'hkl'[hkl_str.index(None)]))
                hkl_int = []
                for i, h_str in enumerate(hkl_str):
                    try:
                        h_int = flex.int(h_str)
                    except ValueError as e:
                        raise CifBuilderError(
                            "Invalid item for Miller index %s: %s" %
                            ("HKL"[i], str(e)))
                    hkl_int.append(h_int)
                indices = flex.miller_index(*hkl_int)
                loops.append((indices, loop))
                break
        return loops

    def get_selection(self,
                      value,
                      wavelength_id=None,
                      crystal_id=None,
                      scale_group_code=None):
        selection = ~((value == '.') | (value == '?'))
        if self.wavelength_id_array is not None and wavelength_id is not None:
            selection &= (self.wavelength_id_array.data() == wavelength_id)
        if self.crystal_id_array is not None and crystal_id is not None:
            selection &= (self.crystal_id_array.data() == crystal_id)
        if self.scale_group_array is not None and scale_group_code is not None:
            selection &= (self.scale_group_array.data() == scale_group_code)
        return selection

    def flex_std_string_as_miller_array(self,
                                        value,
                                        wavelength_id=None,
                                        crystal_id=None,
                                        scale_group_code=None):
        # Create a miller_array object of only the data and indices matching the
        # wavelength_id, crystal_id and scale_group_code submitted or full array if these are None
        selection = self.get_selection(value,
                                       wavelength_id=wavelength_id,
                                       crystal_id=crystal_id,
                                       scale_group_code=scale_group_code)
        data = value.select(selection)
        #if not isinstance(data, flex.double):
        try:
            data = flex.int(data)
            indices = self.indices.select(selection)
        except ValueError:
            try:
                data = flex.double(data)
                indices = self.indices.select(selection)
            except ValueError:
                # if flex.std_string return all values including '.' and '?'
                data = value
                indices = self.indices
        if data.size() == 0: return None
        return miller.array(
            miller.set(self.crystal_symmetry, indices).auto_anomalous(), data)

    def arrays(self):
        return self._arrays

    def origarrays(self):
        """
    return dictionary of raw data found in cif file cast into flex.double arrays
    or just string arrays as a fall back.
    """
        return self._origarrays