def _prepare_pointless_hklin(working_directory, hklin, phi_width): '''Prepare some data for pointless - this will take only 180 degrees of data if there is more than this (through a "rebatch" command) else will simply return hklin.''' # also remove blank images? if not Flags.get_microcrystal() and not Flags.get_small_molecule(): Debug.write('Excluding blank images') hklout = os.path.join( working_directory, '%s_noblank.mtz' % (os.path.split(hklin)[-1][:-4])) FileHandler.record_temporary_file(hklout) hklin = remove_blank(hklin, hklout) # find the number of batches md = Mtzdump() md.set_working_directory(working_directory) auto_logfiler(md) md.set_hklin(hklin) md.dump() batches = max(md.get_batches()) - min(md.get_batches()) phi_limit = 180 if batches * phi_width < phi_limit or Flags.get_small_molecule(): return hklin hklout = os.path.join( working_directory, '%s_prepointless.mtz' % (os.path.split(hklin)[-1][:-4])) rb = Rebatch() rb.set_working_directory(working_directory) auto_logfiler(rb) rb.set_hklin(hklin) rb.set_hklout(hklout) first = min(md.get_batches()) last = first + int(phi_limit / phi_width) Debug.write('Preparing data for pointless - %d batches (%d degrees)' % \ ((last - first), phi_limit)) rb.limit_batches(first, last) # we will want to delete this one exit FileHandler.record_temporary_file(hklout) return hklout
def copyfree(self): '''Copy the free column from freein into hklin -> hklout.''' if not self._hklin_files: raise RuntimeError('no hklin files defined') if len(self._hklin_files) > 1: raise RuntimeError('can have only one hklin to update') hklin = self._hklin_files[0] # get the resolution limit to give as a limit for the FreeR # column md = Mtzdump() md.set_working_directory(self.get_working_directory()) md.set_hklin(hklin) md.dump() resolution_range = md.get_resolution_range() self.check_hklout() if self._freein is None: raise RuntimeError('freein not defined') if self._freein_column is None: raise RuntimeError('freein column not defined') self.add_command_line('hklin1') self.add_command_line(self._freein) self.add_command_line('hklin2') self.add_command_line(hklin) self.start() self.input('labin file_number 1 E1=%s' % self._freein_column) self.input('resolution file_number 1 %f %f' % resolution_range) self.input('labin file_number 2 all') self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status()
def FindFreeFlag(hklin): """Try to find the FREE column in hklin. Raise exception if no column is found or if more than one candidate is found.""" # get the information we need here... mtzdump = Mtzdump() mtzdump.set_hklin(hklin) mtzdump.dump() columns = mtzdump.get_columns() ctypes = {c[0]: c[1] for c in columns} if "FreeR_flag" in ctypes: if ctypes["FreeR_flag"] != "I": raise RuntimeError("FreeR_flag column found: type not I") return "FreeR_flag" # ok, so the usual one wasn't there, look for anything with "free" # in it... possibilities = [c for c in ctypes if "free" in c.lower()] if not possibilities: raise RuntimeError("no candidate FreeR_flag columns found") if len(possibilities) == 1: if ctypes[possibilities[0]] != "I": raise RuntimeError("FreeR_flag column found (%s): type not I" % possibilities[0]) return possibilities[0]
def find_columns(self): '''Identify columns to use with scaleit.''' # run mtzdump to get a list of columns out and also check that # this is a valid merged mtz file.... self.check_hklin() md = Mtzdump() md.set_hklin(self.get_hklin()) md.dump() # get information to check that this is merged # next get the column information - check that F columns are # present column_info = md.get_columns() columns = [] j = 0 groups = 0 # assert that the columns for F, SIGF, DANO, SIGDANO for a # particular group will appear in that order if anomalous, # F, SIGF if not anomalous while j < len(column_info): c = column_info[j] name = c[0] type = c[1] if type == 'F' and name.split('_')[0] == 'F' and \ self._anomalous: groups += 1 for i in range(4): columns.append(column_info[i + j][0]) j += 4 elif type == 'F' and name.split('_')[0] == 'F' and \ not self._anomalous: groups += 1 for i in range(2): columns.append(column_info[i + j][0]) j += 2 else: j += 1 # ok that should be all of the groups identified self._columns = columns return columns
def FindFreeFlag(hklin): '''Try to find the FREE column in hklin. Raise exception if no column is found or if more than one candidate is found.''' # get the information we need here... mtzdump = Mtzdump() mtzdump.set_hklin(hklin) mtzdump.dump() columns = mtzdump.get_columns() ctypes = {} for c in columns: ctypes[c[0]] = c[1] if 'FreeR_flag' in ctypes.keys(): if ctypes['FreeR_flag'] != 'I': raise RuntimeError('FreeR_flag column found: type not I') return 'FreeR_flag' # ok, so the usual one wasn't there, look for anything with "free" # in it... possibilities = [] for c in ctypes.keys(): if 'free' in c.lower(): possibilities.append(c) if len(possibilities) == 0: raise RuntimeError('no candidate FreeR_flag columns found') if len(possibilities) == 1: if ctypes[possibilities[0]] != 'I': raise RuntimeError('FreeR_flag column found (%s): type not I' % \ possibilities[0]) return possibilities[0] raise RuntimeError('Multiple candidate FreeR_flag columns found')
def FindFreeFlag(hklin): '''Try to find the FREE column in hklin. Raise exception if no column is found or if more than one candidate is found.''' # get the information we need here... mtzdump = Mtzdump() mtzdump.set_hklin(hklin) mtzdump.dump() columns = mtzdump.get_columns() ctypes = { } for c in columns: ctypes[c[0]] = c[1] if 'FreeR_flag' in ctypes.keys(): if ctypes['FreeR_flag'] != 'I': raise RuntimeError, 'FreeR_flag column found: type not I' return 'FreeR_flag' # ok, so the usual one wasn't there, look for anything with "free" # in it... possibilities = [] for c in ctypes.keys(): if 'free' in c.lower(): possibilities.append(c) if len(possibilities) == 0: raise RuntimeError, 'no candidate FreeR_flag columns found' if len(possibilities) == 1: if ctypes[possibilities[0]] != 'I': raise RuntimeError, 'FreeR_flag column found (%s): type not I' % \ possibilities[0] return possibilities[0] raise RuntimeError, 'Multiple candidate FreeR_flag columns found'
def update(self): '''Update the information for one reflection file.''' if not self._hklin_files: raise RuntimeError('no hklin files defined') if len(self._hklin_files) > 1: raise RuntimeError('can have only one hklin to update') hklin = self._hklin_files[0] self.check_hklout() column_names_by_file = {} dataset_names_by_file = {} md = Mtzdump() md.set_hklin(hklin) md.dump() columns = md.get_columns() column_names_by_file[hklin] = [] dataset_names_by_file[hklin] = md.get_datasets() # get a dataset ID - see FIXME 03/NOV/06 below... dataset_ids = [md.get_dataset_info(d)['id'] for \ d in md.get_datasets()] for c in columns: name = c[0] if name in ['H', 'K', 'L']: continue column_names_by_file[hklin].append(name) self.add_command_line('hklin1') self.add_command_line(hklin) self.start() dataset_id = dataset_ids[0] if self._pname and self._xname and self._dname: self.input('drename file_number 1 %d %s %s' % \ (dataset_id, self._xname, self._dname)) self.input('dpname file_number 1 %d %s' % \ (dataset_id, self._pname)) column_counter = 0 labin_command = 'labin file_number 1' for column in column_names_by_file[hklin]: column_counter += 1 labin_command += ' E%d=%s' % (column_counter, column) self.input(labin_command) # FIXME perhaps - ASSERT that we want only the information from # the first dataset here... pname, xname, dname = dataset_names_by_file[hklin][0].split('/') dataset_id = dataset_ids[0] # FIXME 03/NOV/06 this needs to id the dataset by it's number # not by pname/xname/dname, as the latter get's confused if the # xname is a number... if self._new_cell_parameters: a, b, c, alpha, beta, gamma = self._new_cell_parameters self.input('dcell file_number 1 %d %f %f %f %f %f %f' % \ (dataset_id, a, b, c, alpha, beta, gamma)) if self._new_column_suffix: suffix = self._new_column_suffix column_counter = 0 labout_command = 'labout file_number 1' for column in column_names_by_file[hklin]: column_counter += 1 labout_command += ' E%d=%s_%s' % \ (column_counter, column, suffix) self.input(labout_command) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status()
def merge(self): '''Merge multiple reflection files into one file.''' if not self._hklin_files: raise RuntimeError('no hklin files defined') self.check_hklout() hklin_counter = 0 # for each reflection file, need to gather the column names # and so on, to put in the cad input here - also check to see # if the column names clash... check also that the spacegroups # match up... spacegroup = None column_names = [] column_names_by_file = {} for hklin in self._hklin_files: md = Mtzdump() md.set_working_directory(self.get_working_directory()) md.set_hklin(hklin) md.dump() columns = md.get_columns() spag = md.get_spacegroup() if spacegroup is None: spacegroup = spag if spag != spacegroup: raise RuntimeError('spacegroups do not match') column_names_by_file[hklin] = [] for c in columns: name = c[0] if name in ['H', 'K', 'L']: continue if name in column_names: raise RuntimeError('duplicate column names') column_names.append(name) column_names_by_file[hklin].append(name) # if we get to here then this is a good set up... # create the command line hklin_counter = 0 for hklin in self._hklin_files: hklin_counter += 1 self.add_command_line('hklin%d' % hklin_counter) self.add_command_line(hklin) self.start() hklin_counter = 0 for hklin in self._hklin_files: column_counter = 0 hklin_counter += 1 labin_command = 'labin file_number %d' % hklin_counter for column in column_names_by_file[hklin]: column_counter += 1 labin_command += ' E%d=%s' % (column_counter, column) self.input(labin_command) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status()
if __name__ == '__main__': # ok, in here (which will be "autoCHEF") this will inspect the MTZ # file and run with DOSE if such a column exists, else will run with # BATCH. N.B. this will require a fix above. chef = Chef(stream = Stdout) dose_column = None overall_dmin = None for argv in sys.argv[1:]: md = Mtzdump() md.set_hklin(argv) md.dump() columns = [c[0] for c in md.get_columns()] if dose_column: assert(dose_column in columns) continue if 'DOSE' in columns: dose_range = md.get_column_range('DOSE')[:2] if dose_range[0] != dose_range[1]: dose_column = 'DOSE'
def update(self): """Update the information for one reflection file.""" if not self._hklin_files: raise RuntimeError("no hklin files defined") if len(self._hklin_files) > 1: raise RuntimeError("can have only one hklin to update") hklin = self._hklin_files[0] self.check_hklout() column_names_by_file = {} dataset_names_by_file = {} md = Mtzdump() md.set_hklin(hklin) md.dump() columns = md.get_columns() column_names_by_file[hklin] = [] dataset_names_by_file[hklin] = md.get_datasets() # get a dataset ID - see FIXME 03/NOV/06 below... dataset_ids = [md.get_dataset_info(d)["id"] for d in md.get_datasets()] for c in columns: name = c[0] if name in ["H", "K", "L"]: continue column_names_by_file[hklin].append(name) self.add_command_line("hklin1") self.add_command_line(hklin) self.start() dataset_id = dataset_ids[0] if self._pname and self._xname and self._dname: self.input( "drename file_number 1 %d %s %s" % (dataset_id, self._xname, self._dname) ) self.input("dpname file_number 1 %d %s" % (dataset_id, self._pname)) column_counter = 0 labin_command = "labin file_number 1" for column in column_names_by_file[hklin]: column_counter += 1 labin_command += " E%d=%s" % (column_counter, column) self.input(labin_command) # FIXME perhaps - ASSERT that we want only the information from # the first dataset here... dataset_id = dataset_ids[0] if self._new_cell_parameters: a, b, c, alpha, beta, gamma = self._new_cell_parameters self.input( "dcell file_number 1 %d %f %f %f %f %f %f" % (dataset_id, a, b, c, alpha, beta, gamma) ) if self._new_column_suffix: suffix = self._new_column_suffix column_counter = 0 labout_command = "labout file_number 1" for column in column_names_by_file[hklin]: column_counter += 1 labout_command += " E%d=%s_%s" % (column_counter, column, suffix) self.input(labout_command) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status()