Ejemplo n.º 1
0
  def _scale(self):
    '''Actually scale all of the data together.'''

    from xia2.Handlers.Environment import debug_memory_usage
    debug_memory_usage()

    Journal.block(
        'scaling', self.get_scaler_xcrystal().get_name(), 'XSCALE',
        {'scaling model':'default (all)'})

    epochs = self._sweep_information.keys()
    epochs.sort()

    xscale = self.XScale()

    xscale.set_spacegroup_number(self._xds_spacegroup)
    xscale.set_cell(self._scalr_cell)

    Debug.write('Set CELL: %.2f %.2f %.2f %.2f %.2f %.2f' % \
                tuple(self._scalr_cell))
    Debug.write('Set SPACEGROUP_NUMBER: %d' % \
                self._xds_spacegroup)

    Debug.write('Gathering measurements for scaling')

    for epoch in epochs:

      # get the prepared reflections
      reflections = self._sweep_information[epoch][
          'prepared_reflections']

      # and the get wavelength that this belongs to
      dname = self._sweep_information[epoch]['dname']
      sname = self._sweep_information[epoch]['sname']

      # and the resolution range for the reflections
      intgr = self._sweep_information[epoch]['integrater']
      Debug.write('Epoch: %d' % epoch)
      Debug.write('HKL: %s (%s/%s)' % (reflections, dname, sname))

      resolution_low = intgr.get_integrater_low_resolution()
      resolution_high, _ = self._scalr_resolution_limits.get((dname, sname), (0.0, None))

      resolution = (resolution_high, resolution_low)

      xscale.add_reflection_file(reflections, dname, resolution)

    # set the global properties of the sample
    xscale.set_crystal(self._scalr_xname)
    xscale.set_anomalous(self._scalr_anomalous)

    debug_memory_usage()
    xscale.run()

    scale_factor = xscale.get_scale_factor()

    Debug.write('XSCALE scale factor found to be: %e' % scale_factor)

    # record the log file

    pname = self._scalr_pname
    xname = self._scalr_xname

    FileHandler.record_log_file('%s %s XSCALE' % \
                                (pname, xname),
                                os.path.join(self.get_working_directory(),
                                             'XSCALE.LP'))

    # check for outlier reflections and if a number are found
    # then iterate (that is, rerun XSCALE, rejecting these outliers)

    if not PhilIndex.params.dials.fast_mode and not PhilIndex.params.xds.keep_outliers:
      xscale_remove = xscale.get_remove()
      if xscale_remove:
        current_remove = []
        final_remove = []

        # first ensure that there are no duplicate entries...
        if os.path.exists(os.path.join(
            self.get_working_directory(),
            'REMOVE.HKL')):
          for line in open(os.path.join(
              self.get_working_directory(),
              'REMOVE.HKL'), 'r').readlines():
            h, k, l = map(int, line.split()[:3])
            z = float(line.split()[3])

            if not (h, k, l, z) in current_remove:
              current_remove.append((h, k, l, z))

          for c in xscale_remove:
            if c in current_remove:
              continue
            final_remove.append(c)

          Debug.write(
              '%d alien reflections are already removed' % \
              (len(xscale_remove) - len(final_remove)))

        else:
          # we want to remove all of the new dodgy reflections
          final_remove = xscale_remove

        remove_hkl = open(os.path.join(
            self.get_working_directory(),
            'REMOVE.HKL'), 'w')

        z_min = PhilIndex.params.xds.z_min
        rejected = 0

        # write in the old reflections
        for remove in current_remove:
          z = remove[3]
          if z >= z_min:
            remove_hkl.write('%d %d %d %f\n' % remove)
          else:
            rejected += 1
        Debug.write('Wrote %d old reflections to REMOVE.HKL' % \
                    (len(current_remove) - rejected))
        Debug.write('Rejected %d as z < %f' % \
                    (rejected, z_min))

        # and the new reflections
        rejected = 0
        used = 0
        for remove in final_remove:
          z = remove[3]
          if z >= z_min:
            used += 1
            remove_hkl.write('%d %d %d %f\n' % remove)
          else:
            rejected += 1
        Debug.write('Wrote %d new reflections to REMOVE.HKL' % \
                    (len(final_remove) - rejected))
        Debug.write('Rejected %d as z < %f' % \
                    (rejected, z_min))

        remove_hkl.close()

        # we want to rerun the finishing step so...
        # unless we have added no new reflections
        if used:
          self.set_scaler_done(False)

    if not self.get_scaler_done():
      Chatter.write('Excluding outlier reflections Z > %.2f' %
                    PhilIndex.params.xds.z_min)
      return

    debug_memory_usage()

    # now get the reflection files out and merge them with aimless

    output_files = xscale.get_output_reflection_files()
    wavelength_names = output_files.keys()

    # these are per wavelength - also allow for user defined resolution
    # limits a la bug # 3183. No longer...

    for epoch in self._sweep_information.keys():

      input = self._sweep_information[epoch]

      intgr = input['integrater']

      rkey = input['dname'], input['sname']

      if intgr.get_integrater_user_resolution():
        dmin = intgr.get_integrater_high_resolution()

        if rkey not in self._user_resolution_limits:
          self._scalr_resolution_limits[rkey] = (dmin, None)
          self._user_resolution_limits[rkey] = dmin
        elif dmin < self._user_resolution_limits[rkey]:
          self._scalr_resolution_limits[rkey] = (dmin, None)
          self._user_resolution_limits[rkey] = dmin

    self._scalr_scaled_refl_files = { }

    self._scalr_statistics = { }

    max_batches = 0
    mtz_dict = { }

    project_info = { }
    for epoch in self._sweep_information.keys():
      pname = self._scalr_pname
      xname = self._scalr_xname
      dname = self._sweep_information[epoch]['dname']
      reflections = os.path.split(
          self._sweep_information[epoch]['prepared_reflections'])[-1]
      project_info[reflections] = (pname, xname, dname)

    for epoch in self._sweep_information.keys():
      self._sweep_information[epoch]['scaled_reflections'] = None

    debug_memory_usage()

    for wavelength in wavelength_names:
      hklin = output_files[wavelength]

      xsh = XDSScalerHelper()
      xsh.set_working_directory(self.get_working_directory())

      ref = xsh.split_and_convert_xscale_output(
          hklin, 'SCALED_', project_info, 1.0 / scale_factor)

      for hklout in ref.keys():
        for epoch in self._sweep_information.keys():
          if os.path.split(self._sweep_information[epoch][
              'prepared_reflections'])[-1] == \
              os.path.split(hklout)[-1]:
            if self._sweep_information[epoch][
                'scaled_reflections'] is not None:
              raise RuntimeError, 'duplicate entries'
            self._sweep_information[epoch][
                'scaled_reflections'] = ref[hklout]

      del(xsh)

    debug_memory_usage()

    for epoch in self._sweep_information.keys():
      hklin = self._sweep_information[epoch]['scaled_reflections']
      dname = self._sweep_information[epoch]['dname']
      sname = self._sweep_information[epoch]['sname']

      hkl_copy = os.path.join(self.get_working_directory(),
                              'R_%s' % os.path.split(hklin)[-1])

      if not os.path.exists(hkl_copy):
        shutil.copyfile(hklin, hkl_copy)

      # let's properly listen to the user's resolution limit needs...

      if self._user_resolution_limits.get((dname, sname), False):
        resolution = self._user_resolution_limits[(dname, sname)]

      else:
        if PhilIndex.params.xia2.settings.resolution.keep_all_reflections == True:
          try:
            resolution = intgr.get_detector().get_max_resolution(intgr.get_beam_obj().get_s0())
            Debug.write('keep_all_reflections set, using detector limits')
          except Exception:
            resolution = self._estimate_resolution_limit(hklin)
        else:
          resolution = self._estimate_resolution_limit(hklin)

      Chatter.write('Resolution for sweep %s/%s: %.2f' % \
                    (dname, sname, resolution))

      if (dname, sname) not in self._scalr_resolution_limits:
        self._scalr_resolution_limits[(dname, sname)] = (resolution, None)
        self.set_scaler_done(False)
      else:
        if resolution < self._scalr_resolution_limits[(dname, sname)][0]:
          self._scalr_resolution_limits[(dname, sname)] = (resolution, None)
          self.set_scaler_done(False)

    debug_memory_usage()

    if not self.get_scaler_done():
      Debug.write('Returning as scaling not finished...')
      return

    self._sort_together_data_xds()

    highest_resolution = min(limit for limit, _ in self._scalr_resolution_limits.values())

    self._scalr_highest_resolution = highest_resolution

    Debug.write('Scaler highest resolution set to %5.2f' % \
                highest_resolution)

    if not self.get_scaler_done():
      Debug.write('Returning as scaling not finished...')
      return

    sdadd_full = 0.0
    sdb_full = 0.0

    # ---------- FINAL MERGING ----------

    sc = self._factory.Aimless()

    FileHandler.record_log_file('%s %s aimless' % (self._scalr_pname,
                                                   self._scalr_xname),
                                sc.get_log_file())

    sc.set_resolution(highest_resolution)
    sc.set_hklin(self._prepared_reflections)
    sc.set_new_scales_file('%s_final.scales' % self._scalr_xname)

    if sdadd_full == 0.0 and sdb_full == 0.0:
      pass
    else:
      sc.add_sd_correction('both', 1.0, sdadd_full, sdb_full)

    for epoch in epochs:
      input = self._sweep_information[epoch]
      start, end = (min(input['batches']), max(input['batches']))

      rkey = input['dname'], input['sname']
      run_resolution_limit, _ = self._scalr_resolution_limits[rkey]

      sc.add_run(start, end, exclude = False,
                 resolution = run_resolution_limit,
                 name = input['sname'])

    sc.set_hklout(os.path.join(self.get_working_directory(),
                               '%s_%s_scaled.mtz' % \
                               (self._scalr_pname, self._scalr_xname)))

    if self.get_scaler_anomalous():
      sc.set_anomalous()

    sc.multi_merge()

    FileHandler.record_xml_file('%s %s aimless xml' % (self._scalr_pname,
                                                       self._scalr_xname),
                                sc.get_xmlout())
    data = sc.get_summary()

    loggraph = sc.parse_ccp4_loggraph()

    standard_deviation_info = { }

    for key in loggraph.keys():
      if 'standard deviation v. Intensity' in key:
        dataset = key.split(',')[-1].strip()
        standard_deviation_info[dataset] = transpose_loggraph(
            loggraph[key])

    resolution_info = { }

    for key in loggraph.keys():
      if 'Analysis against resolution' in key:
        dataset = key.split(',')[-1].strip()
        resolution_info[dataset] = transpose_loggraph(
            loggraph[key])

    # and also radiation damage stuff...

    batch_info = { }

    for key in loggraph.keys():
      if 'Analysis against Batch' in key:
        dataset = key.split(',')[-1].strip()
        batch_info[dataset] = transpose_loggraph(
            loggraph[key])


    # finally put all of the results "somewhere useful"

    self._scalr_statistics = data

    self._scalr_scaled_refl_files = copy.deepcopy(
        sc.get_scaled_reflection_files())

    self._scalr_scaled_reflection_files = { }

    # also output the unmerged scalepack format files...

    sc = self._factory.Aimless()
    sc.set_resolution(highest_resolution)
    sc.set_hklin(self._prepared_reflections)
    sc.set_scalepack()

    for epoch in epochs:
      input = self._sweep_information[epoch]
      start, end = (min(input['batches']), max(input['batches']))

      rkey = input['dname'], input['sname']
      run_resolution_limit, _ = self._scalr_resolution_limits[rkey]

      sc.add_run(start, end, exclude = False,
                 resolution = run_resolution_limit,
                 name = input['sname'])

    sc.set_hklout(os.path.join(self.get_working_directory(),
                               '%s_%s_scaled.mtz' % \
                               (self._scalr_pname,
                                self._scalr_xname)))

    if self.get_scaler_anomalous():
      sc.set_anomalous()

    sc.multi_merge()

    self._scalr_scaled_reflection_files['sca_unmerged'] = { }
    self._scalr_scaled_reflection_files['mtz_unmerged'] = { }

    for dataset in sc.get_scaled_reflection_files().keys():
      hklout = sc.get_scaled_reflection_files()[dataset]

      # then mark the scalepack files for copying...

      scalepack = os.path.join(os.path.split(hklout)[0],
                               os.path.split(hklout)[1].replace(
          '_scaled', '_scaled_unmerged').replace('.mtz', '.sca'))
      self._scalr_scaled_reflection_files['sca_unmerged'][
          dataset] = scalepack
      FileHandler.record_data_file(scalepack)
      mtz_unmerged = os.path.splitext(scalepack)[0] + '.mtz'
      self._scalr_scaled_reflection_files['mtz_unmerged'][dataset] = mtz_unmerged
      FileHandler.record_data_file(mtz_unmerged)

    if PhilIndex.params.xia2.settings.merging_statistics.source == 'cctbx':
      for key in self._scalr_scaled_refl_files:
        stats = self._compute_scaler_statistics(
          self._scalr_scaled_reflection_files['mtz_unmerged'][key], wave=key)
        self._scalr_statistics[
          (self._scalr_pname, self._scalr_xname, key)] = stats

    # convert reflection files to .sca format - use mtz2various for this

    self._scalr_scaled_reflection_files['sca'] = { }
    self._scalr_scaled_reflection_files['hkl'] = { }

    for key in self._scalr_scaled_refl_files:

      f = self._scalr_scaled_refl_files[key]
      scaout = '%s.sca' % f[:-4]

      m2v = self._factory.Mtz2various()
      m2v.set_hklin(f)
      m2v.set_hklout(scaout)
      m2v.convert()

      self._scalr_scaled_reflection_files['sca'][key] = scaout
      FileHandler.record_data_file(scaout)

      if PhilIndex.params.xia2.settings.small_molecule == True:
        hklout = '%s.hkl' % f[:-4]

        m2v = self._factory.Mtz2various()
        m2v.set_hklin(f)
        m2v.set_hklout(hklout)
        m2v.convert_shelx()

        self._scalr_scaled_reflection_files['hkl'][key] = hklout
        FileHandler.record_data_file(hklout)