Beispiel #1
0
def _prepare_pointless_hklin(working_directory,
                             hklin,
                             phi_width):
  '''Prepare some data for pointless - this will take only 180 degrees
  of data if there is more than this (through a "rebatch" command) else
  will simply return hklin.'''

  # also remove blank images?

  if not Flags.get_microcrystal() and not Flags.get_small_molecule():

    Debug.write('Excluding blank images')

    hklout = os.path.join(
        working_directory,
        '%s_noblank.mtz' % (os.path.split(hklin)[-1][:-4]))

    FileHandler.record_temporary_file(hklout)

    hklin = remove_blank(hklin, hklout)

  # find the number of batches

  md = Mtzdump()
  md.set_working_directory(working_directory)
  auto_logfiler(md)
  md.set_hklin(hklin)
  md.dump()

  batches = max(md.get_batches()) - min(md.get_batches())

  phi_limit = 180

  if batches * phi_width < phi_limit or Flags.get_small_molecule():
    return hklin

  hklout = os.path.join(
      working_directory,
      '%s_prepointless.mtz' % (os.path.split(hklin)[-1][:-4]))

  rb = Rebatch()
  rb.set_working_directory(working_directory)
  auto_logfiler(rb)
  rb.set_hklin(hklin)
  rb.set_hklout(hklout)

  first = min(md.get_batches())
  last = first + int(phi_limit / phi_width)

  Debug.write('Preparing data for pointless - %d batches (%d degrees)' % \
              ((last - first), phi_limit))

  rb.limit_batches(first, last)

  # we will want to delete this one exit
  FileHandler.record_temporary_file(hklout)

  return hklout
Beispiel #2
0
        def copyfree(self):
            '''Copy the free column from freein into hklin -> hklout.'''

            if not self._hklin_files:
                raise RuntimeError('no hklin files defined')

            if len(self._hklin_files) > 1:
                raise RuntimeError('can have only one hklin to update')

            hklin = self._hklin_files[0]

            # get the resolution limit to give as a limit for the FreeR
            # column

            md = Mtzdump()
            md.set_working_directory(self.get_working_directory())
            md.set_hklin(hklin)
            md.dump()
            resolution_range = md.get_resolution_range()

            self.check_hklout()
            if self._freein is None:
                raise RuntimeError('freein not defined')
            if self._freein_column is None:
                raise RuntimeError('freein column not defined')

            self.add_command_line('hklin1')
            self.add_command_line(self._freein)
            self.add_command_line('hklin2')
            self.add_command_line(hklin)
            self.start()

            self.input('labin file_number 1 E1=%s' % self._freein_column)
            self.input('resolution file_number 1 %f %f' % resolution_range)
            self.input('labin file_number 2 all')

            self.close_wait()

            try:
                self.check_for_errors()
                self.check_ccp4_errors()

            except RuntimeError as e:
                # something went wrong; remove the output file
                try:
                    os.remove(self.get_hklout())
                except Exception:
                    pass
                raise e

            return self.get_ccp4_status()
Beispiel #3
0
def FindFreeFlag(hklin):
    """Try to find the FREE column in hklin. Raise exception if no column is
    found or if more than one candidate is found."""

    # get the information we need here...

    mtzdump = Mtzdump()
    mtzdump.set_hklin(hklin)
    mtzdump.dump()
    columns = mtzdump.get_columns()

    ctypes = {c[0]: c[1] for c in columns}

    if "FreeR_flag" in ctypes:
        if ctypes["FreeR_flag"] != "I":
            raise RuntimeError("FreeR_flag column found: type not I")

        return "FreeR_flag"

    # ok, so the usual one wasn't there, look for anything with "free"
    # in it...

    possibilities = [c for c in ctypes if "free" in c.lower()]

    if not possibilities:
        raise RuntimeError("no candidate FreeR_flag columns found")

    if len(possibilities) == 1:
        if ctypes[possibilities[0]] != "I":
            raise RuntimeError("FreeR_flag column found (%s): type not I" %
                               possibilities[0])

        return possibilities[0]
Beispiel #4
0
    def find_columns(self):
      '''Identify columns to use with scaleit.'''

      # run mtzdump to get a list of columns out and also check that
      # this is a valid merged mtz file....

      self.check_hklin()

      md = Mtzdump()
      md.set_hklin(self.get_hklin())
      md.dump()

      # get information to check that this is merged

      # next get the column information - check that F columns are
      # present

      column_info = md.get_columns()

      columns = []

      j = 0
      groups = 0

      # assert that the columns for F, SIGF, DANO, SIGDANO for a
      # particular group will appear in that order if anomalous,
      # F, SIGF if not anomalous

      while j < len(column_info):
        c = column_info[j]
        name = c[0]
        type = c[1]

        if type == 'F' and name.split('_')[0] == 'F' and \
               self._anomalous:
          groups += 1
          for i in range(4):
            columns.append(column_info[i + j][0])

          j += 4

        elif type == 'F' and name.split('_')[0] == 'F' and \
               not self._anomalous:
          groups += 1
          for i in range(2):
            columns.append(column_info[i + j][0])

          j += 2
        else:
          j += 1


      # ok that should be all of the groups identified

      self._columns = columns

      return columns
Beispiel #5
0
def FindFreeFlag(hklin):
    '''Try to find the FREE column in hklin. Raise exception if no column is
  found or if more than one candidate is found.'''

    # get the information we need here...

    mtzdump = Mtzdump()
    mtzdump.set_hklin(hklin)
    mtzdump.dump()
    columns = mtzdump.get_columns()

    ctypes = {}

    for c in columns:
        ctypes[c[0]] = c[1]

    if 'FreeR_flag' in ctypes.keys():
        if ctypes['FreeR_flag'] != 'I':
            raise RuntimeError('FreeR_flag column found: type not I')

        return 'FreeR_flag'

    # ok, so the usual one wasn't there, look for anything with "free"
    # in it...

    possibilities = []

    for c in ctypes.keys():
        if 'free' in c.lower():
            possibilities.append(c)

    if len(possibilities) == 0:
        raise RuntimeError('no candidate FreeR_flag columns found')

    if len(possibilities) == 1:
        if ctypes[possibilities[0]] != 'I':
            raise RuntimeError('FreeR_flag column found (%s): type not I' % \
                  possibilities[0])

        return possibilities[0]

    raise RuntimeError('Multiple candidate FreeR_flag columns found')
Beispiel #6
0
def FindFreeFlag(hklin):
  '''Try to find the FREE column in hklin. Raise exception if no column is
  found or if more than one candidate is found.'''

  # get the information we need here...

  mtzdump = Mtzdump()
  mtzdump.set_hklin(hklin)
  mtzdump.dump()
  columns = mtzdump.get_columns()

  ctypes = { }

  for c in columns:
    ctypes[c[0]] = c[1]

  if 'FreeR_flag' in ctypes.keys():
    if ctypes['FreeR_flag'] != 'I':
      raise RuntimeError, 'FreeR_flag column found: type not I'

    return 'FreeR_flag'

  # ok, so the usual one wasn't there, look for anything with "free"
  # in it...

  possibilities = []

  for c in ctypes.keys():
    if 'free' in c.lower():
      possibilities.append(c)

  if len(possibilities) == 0:
    raise RuntimeError, 'no candidate FreeR_flag columns found'

  if len(possibilities) == 1:
    if ctypes[possibilities[0]] != 'I':
      raise RuntimeError, 'FreeR_flag column found (%s): type not I' % \
            possibilities[0]

    return possibilities[0]

  raise RuntimeError, 'Multiple candidate FreeR_flag columns found'
Beispiel #7
0
        def update(self):
            '''Update the information for one reflection file.'''

            if not self._hklin_files:
                raise RuntimeError('no hklin files defined')

            if len(self._hklin_files) > 1:
                raise RuntimeError('can have only one hklin to update')

            hklin = self._hklin_files[0]

            self.check_hklout()

            column_names_by_file = {}
            dataset_names_by_file = {}

            md = Mtzdump()
            md.set_hklin(hklin)
            md.dump()
            columns = md.get_columns()

            column_names_by_file[hklin] = []
            dataset_names_by_file[hklin] = md.get_datasets()

            # get a dataset ID - see FIXME 03/NOV/06 below...

            dataset_ids = [md.get_dataset_info(d)['id'] for \
                           d in md.get_datasets()]

            for c in columns:
                name = c[0]
                if name in ['H', 'K', 'L']:
                    continue

                column_names_by_file[hklin].append(name)

            self.add_command_line('hklin1')
            self.add_command_line(hklin)
            self.start()

            dataset_id = dataset_ids[0]

            if self._pname and self._xname and self._dname:
                self.input('drename file_number 1 %d %s %s' % \
                           (dataset_id, self._xname, self._dname))
                self.input('dpname file_number 1 %d %s' % \
                           (dataset_id, self._pname))

            column_counter = 0
            labin_command = 'labin file_number 1'
            for column in column_names_by_file[hklin]:
                column_counter += 1
                labin_command += ' E%d=%s' % (column_counter, column)

            self.input(labin_command)

            # FIXME perhaps - ASSERT that we want only the information from
            # the first dataset here...

            pname, xname, dname = dataset_names_by_file[hklin][0].split('/')
            dataset_id = dataset_ids[0]

            # FIXME 03/NOV/06 this needs to id the dataset by it's number
            # not by pname/xname/dname, as the latter get's confused if the
            # xname is a number...

            if self._new_cell_parameters:
                a, b, c, alpha, beta, gamma = self._new_cell_parameters
                self.input('dcell file_number 1 %d %f %f %f %f %f %f' % \
                           (dataset_id, a, b, c, alpha, beta, gamma))

            if self._new_column_suffix:
                suffix = self._new_column_suffix
                column_counter = 0
                labout_command = 'labout file_number 1'
                for column in column_names_by_file[hklin]:
                    column_counter += 1
                    labout_command += ' E%d=%s_%s' % \
                                     (column_counter, column, suffix)

                self.input(labout_command)

            self.close_wait()

            try:
                self.check_for_errors()
                self.check_ccp4_errors()

            except RuntimeError as e:
                # something went wrong; remove the output file
                try:
                    os.remove(self.get_hklout())
                except Exception:
                    pass
                raise e

            return self.get_ccp4_status()
Beispiel #8
0
        def merge(self):
            '''Merge multiple reflection files into one file.'''

            if not self._hklin_files:
                raise RuntimeError('no hklin files defined')

            self.check_hklout()

            hklin_counter = 0

            # for each reflection file, need to gather the column names
            # and so on, to put in the cad input here - also check to see
            # if the column names clash... check also that the spacegroups
            # match up...

            spacegroup = None
            column_names = []
            column_names_by_file = {}

            for hklin in self._hklin_files:
                md = Mtzdump()
                md.set_working_directory(self.get_working_directory())
                md.set_hklin(hklin)
                md.dump()
                columns = md.get_columns()
                spag = md.get_spacegroup()

                if spacegroup is None:
                    spacegroup = spag

                if spag != spacegroup:
                    raise RuntimeError('spacegroups do not match')

                column_names_by_file[hklin] = []

                for c in columns:
                    name = c[0]
                    if name in ['H', 'K', 'L']:
                        continue
                    if name in column_names:
                        raise RuntimeError('duplicate column names')
                    column_names.append(name)
                    column_names_by_file[hklin].append(name)

            # if we get to here then this is a good set up...

            # create the command line

            hklin_counter = 0
            for hklin in self._hklin_files:
                hklin_counter += 1
                self.add_command_line('hklin%d' % hklin_counter)
                self.add_command_line(hklin)

            self.start()

            hklin_counter = 0

            for hklin in self._hklin_files:
                column_counter = 0
                hklin_counter += 1
                labin_command = 'labin file_number %d' % hklin_counter
                for column in column_names_by_file[hklin]:
                    column_counter += 1
                    labin_command += ' E%d=%s' % (column_counter, column)

                self.input(labin_command)

            self.close_wait()

            try:
                self.check_for_errors()
                self.check_ccp4_errors()

            except RuntimeError as e:
                # something went wrong; remove the output file
                try:
                    os.remove(self.get_hklout())
                except Exception:
                    pass
                raise e

            return self.get_ccp4_status()
Beispiel #9
0
if __name__ == '__main__':

  # ok, in here (which will be "autoCHEF") this will inspect the MTZ
  # file and run with DOSE if such a column exists, else will run with
  # BATCH. N.B. this will require a fix above.

  chef = Chef(stream = Stdout)

  dose_column = None

  overall_dmin = None

  for argv in sys.argv[1:]:

    md = Mtzdump()
    md.set_hklin(argv)
    md.dump()

    columns = [c[0] for c in md.get_columns()]

    if dose_column:
      assert(dose_column in columns)
      continue

    if 'DOSE' in columns:

      dose_range = md.get_column_range('DOSE')[:2]
      if dose_range[0] != dose_range[1]:
        dose_column = 'DOSE'
Beispiel #10
0
if __name__ == '__main__':

  # ok, in here (which will be "autoCHEF") this will inspect the MTZ
  # file and run with DOSE if such a column exists, else will run with
  # BATCH. N.B. this will require a fix above.

  chef = Chef(stream = Stdout)

  dose_column = None

  overall_dmin = None

  for argv in sys.argv[1:]:

    md = Mtzdump()
    md.set_hklin(argv)
    md.dump()

    columns = [c[0] for c in md.get_columns()]

    if dose_column:
      assert(dose_column in columns)
      continue

    if 'DOSE' in columns:

      dose_range = md.get_column_range('DOSE')[:2]
      if dose_range[0] != dose_range[1]:
        dose_column = 'DOSE'
Beispiel #11
0
        def update(self):
            """Update the information for one reflection file."""

            if not self._hklin_files:
                raise RuntimeError("no hklin files defined")

            if len(self._hklin_files) > 1:
                raise RuntimeError("can have only one hklin to update")

            hklin = self._hklin_files[0]

            self.check_hklout()

            column_names_by_file = {}
            dataset_names_by_file = {}

            md = Mtzdump()
            md.set_hklin(hklin)
            md.dump()
            columns = md.get_columns()

            column_names_by_file[hklin] = []
            dataset_names_by_file[hklin] = md.get_datasets()

            # get a dataset ID - see FIXME 03/NOV/06 below...

            dataset_ids = [md.get_dataset_info(d)["id"] for d in md.get_datasets()]

            for c in columns:
                name = c[0]
                if name in ["H", "K", "L"]:
                    continue

                column_names_by_file[hklin].append(name)

            self.add_command_line("hklin1")
            self.add_command_line(hklin)
            self.start()

            dataset_id = dataset_ids[0]

            if self._pname and self._xname and self._dname:
                self.input(
                    "drename file_number 1 %d %s %s"
                    % (dataset_id, self._xname, self._dname)
                )
                self.input("dpname file_number 1 %d %s" % (dataset_id, self._pname))

            column_counter = 0
            labin_command = "labin file_number 1"
            for column in column_names_by_file[hklin]:
                column_counter += 1
                labin_command += " E%d=%s" % (column_counter, column)

            self.input(labin_command)

            # FIXME perhaps - ASSERT that we want only the information from
            # the first dataset here...

            dataset_id = dataset_ids[0]

            if self._new_cell_parameters:
                a, b, c, alpha, beta, gamma = self._new_cell_parameters
                self.input(
                    "dcell file_number 1 %d %f %f %f %f %f %f"
                    % (dataset_id, a, b, c, alpha, beta, gamma)
                )

            if self._new_column_suffix:
                suffix = self._new_column_suffix
                column_counter = 0
                labout_command = "labout file_number 1"
                for column in column_names_by_file[hklin]:
                    column_counter += 1
                    labout_command += " E%d=%s_%s" % (column_counter, column, suffix)

                self.input(labout_command)

            self.close_wait()

            try:
                self.check_for_errors()
                self.check_ccp4_errors()

            except RuntimeError as e:
                # something went wrong; remove the output file
                try:
                    os.remove(self.get_hklout())
                except Exception:
                    pass
                raise e

            return self.get_ccp4_status()