def tst_copy(self): import copy from dials.array_family import flex # Create a table table = flex.reflection_table([ ('col1', flex.int(range(10)))]) # Make a shallow copy of the table shallow = copy.copy(table) shallow['col2'] = flex.double(range(10)) assert(table.ncols() == 2) assert(table.is_consistent()) print 'OK' # Make a deep copy of the table deep = copy.deepcopy(table) deep['col3'] = flex.std_string(10) assert(table.ncols() == 2) assert(deep.ncols() == 3) assert(table.is_consistent()) assert(deep.is_consistent()) table2 = table.copy() table2['col3'] = flex.std_string(10) assert(table.ncols() == 2) assert(table2.ncols() == 3) assert(table.is_consistent()) assert(table2.is_consistent()) print 'OK'
def tst_select(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table = flex.reflection_table() table['col1'] = flex.int(c1) table['col2'] = flex.double(c2) table['col3'] = flex.std_string(c3) # Select some columns new_table = table.select(('col1', 'col2')) assert(new_table.nrows() == 10) assert(new_table.ncols() == 2) assert(all(a == b for a, b in zip(new_table['col1'], c1))) assert(all(a == b for a, b in zip(new_table['col2'], c2))) print 'OK' # Select some columns new_table = table.select(flex.std_string(['col1', 'col2'])) assert(new_table.nrows() == 10) assert(new_table.ncols() == 2) assert(all(a == b for a, b in zip(new_table['col1'], c1))) assert(all(a == b for a, b in zip(new_table['col2'], c2))) print 'OK' # Select some rows index = flex.size_t([0, 1, 5, 8, 9]) cc1 = [c1[i] for i in index] cc2 = [c2[i] for i in index] cc3 = [c3[i] for i in index] new_table = table.select(index) assert(new_table.nrows() == 5) assert(new_table.ncols() == 3) assert(all(a == b for a, b in zip(new_table['col1'], cc1))) assert(all(a == b for a, b in zip(new_table['col2'], cc2))) assert(all(a == b for a, b in zip(new_table['col3'], cc3))) print 'OK' # Select some rows index = flex.bool([True, True, False, False, False, True, False, False, True, True]) new_table = table.select(index) assert(new_table.nrows() == 5) assert(new_table.ncols() == 3) assert(all(a == b for a, b in zip(new_table['col1'], cc1))) assert(all(a == b for a, b in zip(new_table['col2'], cc2))) assert(all(a == b for a, b in zip(new_table['col3'], cc3))) print 'OK'
def tst_updating(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table0 = flex.reflection_table() table1 = flex.reflection_table() table2 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table2['col3'] = flex.std_string(c3) # Update from zero columns table0.update(table1) assert(table0.is_consistent()) assert(table0.nrows() == 10) assert(table0.ncols() == 2) print 'OK' # Update table1 with table2 columns table1.update(table2) assert(table1.is_consistent()) assert(table1.nrows() == 10) assert(table1.ncols() == 3) assert(table2.is_consistent()) assert(table2.nrows() == 10) assert(table2.ncols() == 1) print 'OK' # Update trable1 with invalid table c3 = ['a', 'b', 'c'] # Create a table with some elements table2 = flex.reflection_table() table2['col3'] = flex.std_string(c3) try: table1.update(table2) assert(False) except Exception: pass assert(table1.is_consistent()) assert(table1.nrows() == 10) assert(table1.ncols() == 3) assert(table2.is_consistent()) assert(table2.nrows() == 3) assert(table2.ncols() == 1) print 'OK'
def tst_serialize(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table = flex.reflection_table() table['col1'] = flex.int(c1) table['col2'] = flex.double(c2) table['col3'] = flex.std_string(c3) # Pickle, then unpickle import cPickle as pickle obj = pickle.dumps(table) new_table = pickle.loads(obj) assert(new_table.is_consistent()) assert(new_table.nrows() == 10) assert(new_table.ncols() == 3) assert(all(a == b for a, b in zip(new_table['col1'], c1))) assert(all(a == b for a, b in zip(new_table['col2'], c2))) assert(all(a == b for a, b in zip(new_table['col3'], c3))) print 'OK'
def tst_init(self): from dials.array_family import flex # test default table = flex.reflection_table() assert(table.is_consistent()) assert(table.nrows() == 0) assert(table.ncols() == 0) assert(table.empty()) print 'Ok' # test with nrows table = flex.reflection_table(10) assert(table.is_consistent()) assert(table.nrows() == 10) assert(table.ncols() == 0) assert(table.empty()) print 'OK' # test with valid columns table = flex.reflection_table([ ('col1', flex.int(10)), ('col2', flex.double(10)), ('col3', flex.std_string(10))]) assert(table.is_consistent()) assert(table.nrows() == 10) assert(table.ncols() == 3) assert(not table.empty()) print 'OK' # test with invalid columns try: table = flex.reflection_table([ ('col1', flex.int(10)), ('col2', flex.double(20)), ('col3', flex.std_string(10))]) assert(false) except Exception: pass print 'OK'
def tst_iteration(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table = flex.reflection_table() table['col1'] = flex.int(c1) table['col2'] = flex.double(c2) table['col3'] = flex.std_string(c3) # Try iterating keys k = [] for key in table.keys(): k.append(key) assert(len(k) == 3) assert(k.count('col1') == 1) assert(k.count('col2') == 1) assert(k.count('col3') == 1) print 'OK' # Try iterating columns k = [] c = [] for key, col in table.cols(): k.append(key) c.append(col) assert(len(k) == 3) assert(k.count('col1') == 1) assert(k.count('col2') == 1) assert(k.count('col3') == 1) print 'OK' # Try iterating rows for row1, row2 in zip(table.rows(), zip(c1, c2, c3)): assert(row1['col1'] == row2[0]) assert(row1['col2'] == row2[1]) assert(row1['col3'] == row2[2]) print 'OK'
def tst_row_operations(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table = flex.reflection_table() table['col1'] = flex.int(c1) table['col2'] = flex.double(c2) table['col3'] = flex.std_string(c3) # Extend the table table.extend(table) c1 = c1 * 2 c2 = c2 * 2 c3 = c3 * 2 assert(table.nrows() == 20) assert(table.ncols() == 3) assert(table.is_consistent()) assert(all(a == b for a, b in zip(table['col1'], c1))) assert(all(a == b for a, b in zip(table['col2'], c2))) assert(all(a == b for a, b in zip(table['col3'], c3))) print 'OK' # Append some rows to the table row = { 'col1' : 10 } c1 = c1 + [10] c2 = c2 + [0] c3 = c3 + [''] table.append(row) assert(table.nrows() == 21) assert(table.ncols() == 3) assert(table.is_consistent()) assert(all(a == b for a, b in zip(table['col1'], c1))) assert(all(a == b for a, b in zip(table['col2'], c2))) assert(all(a == b for a, b in zip(table['col3'], c3))) print 'OK' row = { 'col2' : 11 } c1 = c1 + [0] c2 = c2 + [11] c3 = c3 + [''] table.append(row) assert(table.nrows() == 22) assert(table.ncols() == 3) assert(table.is_consistent()) assert(all(a == b for a, b in zip(table['col1'], c1))) assert(all(a == b for a, b in zip(table['col2'], c2))) assert(all(a == b for a, b in zip(table['col3'], c3))) print 'OK' row = { 'col1' : 12, 'col2' : 12, 'col3' : 'l' } c1 = c1 + [12] c2 = c2 + [12] c3 = c3 + ['l'] table.append(row) assert(table.nrows() == 23) assert(table.ncols() == 3) assert(table.is_consistent()) assert(all(a == b for a, b in zip(table['col1'], c1))) assert(all(a == b for a, b in zip(table['col2'], c2))) assert(all(a == b for a, b in zip(table['col3'], c3))) print 'OK' # Try inserting some rows row = { 'col1' : -1 } c1.insert(5, -1) c2.insert(5, 0) c3.insert(5, '') table.insert(5, row) assert(table.nrows() == 24) assert(table.ncols() == 3) assert(table.is_consistent()) assert(all(a == b for a, b in zip(table['col1'], c1))) assert(all(a == b for a, b in zip(table['col2'], c2))) assert(all(a == b for a, b in zip(table['col3'], c3))) print 'OK' row = { 'col1' : -2, 'col2' : -3, 'col3' : 'abc' } c1.insert(2, -2) c2.insert(2, -3) c3.insert(2, 'abc') table.insert(2, row) assert(table.nrows() == 25) assert(table.ncols() == 3) assert(table.is_consistent()) assert(all(a == b for a, b in zip(table['col1'], c1))) assert(all(a == b for a, b in zip(table['col2'], c2))) assert(all(a == b for a, b in zip(table['col3'], c3))) print 'OK' # Try iterating through table rows for i in range(table.nrows()): row = table[i] assert(row['col1'] == c1[i]) assert(row['col2'] == c2[i]) assert(row['col3'] == c3[i]) print 'OK' # Trying setting some rows row = { 'col1' : 100 } table[2] = row assert(table[2]['col1'] == 100) assert(table[2]['col2'] == c2[2]) assert(table[2]['col3'] == c3[2]) row = { 'col1' : 1000, 'col2' : 2000, 'col3' : 'hello' } table[10] = row assert(table[10]['col1'] == 1000) assert(table[10]['col2'] == 2000) assert(table[10]['col3'] == 'hello') print 'OK'
def test_to_from_msgpack(tmpdir): from dials.model.data import Shoebox def gen_shoebox(): shoebox = Shoebox(0, (0, 4, 0, 3, 0, 1)) shoebox.allocate() for k in range(1): for j in range(3): for i in range(4): shoebox.data[k, j, i] = i + j + k + 0.1 shoebox.mask[k, j, i] = i % 2 shoebox.background[k, j, i] = i * j + 0.2 return shoebox def compare(a, b): assert a.is_consistent() assert b.is_consistent() assert a.panel == b.panel assert a.bbox == b.bbox for aa, bb in zip(a.data, b.data): if abs(aa - bb) > 1e-9: return False for aa, bb in zip(a.background, b.background): if abs(aa - bb) > 1e-9: return False for aa, bb in zip(a.mask, b.mask): if aa != bb: return False return True # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ["a", "b", "c", "d", "e", "f", "g", "i", "j", "k"] c4 = [True, False, True, False, True] * 2 c5 = list(range(10)) c6 = [(i + 1, i + 2) for i in range(10)] c7 = [(i + 1, i + 2, i + 3) for i in range(10)] c8 = [tuple(i + j for j in range(9)) for i in range(10)] c9 = [tuple(i + j for j in range(6)) for i in range(10)] c10 = [(i + 1, i + 2, i + 3) for i in range(10)] c11 = [gen_shoebox() for i in range(10)] # Create a table with some elements table = flex.reflection_table() table["col1"] = flex.int(c1) table["col2"] = flex.double(c2) table["col3"] = flex.std_string(c3) table["col4"] = flex.bool(c4) table["col5"] = flex.size_t(c5) table["col6"] = flex.vec2_double(c6) table["col7"] = flex.vec3_double(c7) table["col8"] = flex.mat3_double(c8) table["col9"] = flex.int6(c9) table["col10"] = flex.miller_index(c10) table["col11"] = flex.shoebox(c11) obj = table.as_msgpack() new_table = flex.reflection_table.from_msgpack(obj) assert new_table.is_consistent() assert new_table.nrows() == 10 assert new_table.ncols() == 11 assert all(tuple(a == b for a, b in zip(new_table["col1"], c1))) assert all(tuple(a == b for a, b in zip(new_table["col2"], c2))) assert all(tuple(a == b for a, b in zip(new_table["col3"], c3))) assert all(tuple(a == b for a, b in zip(new_table["col4"], c4))) assert all(tuple(a == b for a, b in zip(new_table["col5"], c5))) assert all(tuple(a == b for a, b in zip(new_table["col6"], c6))) assert all(tuple(a == b for a, b in zip(new_table["col7"], c7))) assert all(tuple(a == b for a, b in zip(new_table["col8"], c8))) assert all(tuple(a == b for a, b in zip(new_table["col9"], c9))) assert all(tuple(a == b for a, b in zip(new_table["col10"], c10))) assert all(tuple(compare(a, b) for a, b in zip(new_table["col11"], c11))) table.as_msgpack_file(tmpdir.join("reflections.mpack").strpath) new_table = flex.reflection_table.from_msgpack_file( tmpdir.join("reflections.mpack").strpath) assert new_table.is_consistent() assert new_table.nrows() == 10 assert new_table.ncols() == 11 assert all(tuple(a == b for a, b in zip(new_table["col1"], c1))) assert all(tuple(a == b for a, b in zip(new_table["col2"], c2))) assert all(tuple(a == b for a, b in zip(new_table["col3"], c3))) assert all(tuple(a == b for a, b in zip(new_table["col4"], c4))) assert all(tuple(a == b for a, b in zip(new_table["col5"], c5))) assert all(tuple(a == b for a, b in zip(new_table["col6"], c6))) assert all(tuple(a == b for a, b in zip(new_table["col7"], c7))) assert all(tuple(a == b for a, b in zip(new_table["col8"], c8))) assert all(tuple(a == b for a, b in zip(new_table["col9"], c9))) assert all(tuple(a == b for a, b in zip(new_table["col10"], c10))) assert all(tuple(compare(a, b) for a, b in zip(new_table["col11"], c11)))
def tst_del_selected(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table1['col3'] = flex.std_string(c3) # Del selected columns table1.del_selected(('col3', 'col2')) assert (table1.nrows() == 10) assert (table1.ncols() == 1) assert ("col1" in table1) assert ("col2" not in table1) assert ("col3" not in table1) assert (all(a == b for a, b in zip(table1['col1'], c1))) print 'OK' # Del selected columns table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table1['col3'] = flex.std_string(c3) table1.del_selected(flex.std_string(['col3', 'col2'])) assert (table1.nrows() == 10) assert (table1.ncols() == 1) assert ("col1" in table1) assert ("col2" not in table1) assert ("col3" not in table1) assert (all(a == b for a, b in zip(table1['col1'], c1))) print 'OK' # Del selected rows table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table1['col3'] = flex.std_string(c3) index = flex.size_t([0, 1, 5, 8, 9]) index2 = range(10) for i in index: index2.remove(i) ccc1 = [c1[i] for i in index2] ccc2 = [c2[i] for i in index2] ccc3 = [c3[i] for i in index2] table1.del_selected(index) assert (table1.nrows() == len(ccc1)) assert (all(a == b for a, b in zip(table1['col1'], ccc1))) assert (all(a == b for a, b in zip(table1['col2'], ccc2))) assert (all(a == b for a, b in zip(table1['col3'], ccc3))) print 'OK' # Del selected rows table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table1['col3'] = flex.std_string(c3) flags = flex.bool( [True, True, False, False, False, True, False, False, True, True]) table1.del_selected(index) assert (table1.nrows() == len(ccc1)) assert (all(a == b for a, b in zip(table1['col1'], ccc1))) assert (all(a == b for a, b in zip(table1['col2'], ccc2))) assert (all(a == b for a, b in zip(table1['col3'], ccc3))) print 'OK'
def tst_slicing(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table = flex.reflection_table() table['col1'] = flex.int(c1) table['col2'] = flex.double(c2) table['col3'] = flex.std_string(c3) # Try forward slicing new_table = table[2:7:2] assert (new_table.ncols() == 3) assert (new_table.nrows() == 3) assert (new_table.is_consistent()) c11 = c1[2:7:2] c22 = c2[2:7:2] c33 = c3[2:7:2] assert (all(a == b for a, b in zip(new_table['col1'], c11))) assert (all(a == b for a, b in zip(new_table['col2'], c22))) assert (all(a == b for a, b in zip(new_table['col3'], c33))) print 'OK' # Try backward slicing new_table = table[7:2:-2] assert (new_table.ncols() == 3) assert (new_table.nrows() == 3) assert (new_table.is_consistent()) c11 = c1[7:2:-2] c22 = c2[7:2:-2] c33 = c3[7:2:-2] assert (all(a == b for a, b in zip(new_table['col1'], c11))) assert (all(a == b for a, b in zip(new_table['col2'], c22))) assert (all(a == b for a, b in zip(new_table['col3'], c33))) print 'OK' # Try setting forward slicing table[2:7:2] = new_table assert (table.ncols() == 3) assert (table.nrows() == 10) assert (table.is_consistent()) c1[2:7:2] = c11 c2[2:7:2] = c22 c3[2:7:2] = c33 assert (all(a == b for a, b in zip(table['col1'], c1))) assert (all(a == b for a, b in zip(table['col2'], c2))) assert (all(a == b for a, b in zip(table['col3'], c3))) print 'OK' # Try setting backward slicing table[7:2:-2] = new_table assert (table.ncols() == 3) assert (table.nrows() == 10) assert (table.is_consistent()) c1[7:2:-2] = c11 c2[7:2:-2] = c22 c3[7:2:-2] = c33 assert (all(a == b for a, b in zip(table['col1'], c1))) assert (all(a == b for a, b in zip(table['col2'], c2))) assert (all(a == b for a, b in zip(table['col3'], c3))) print 'OK'
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.params.input.path = None # the input is already parsed from xfel.merging.application.input.file_load_calculator import file_load_calculator load_calculator = file_load_calculator(self.params, file_list) calculated_file_list = load_calculator.calculate_file_load( self.mpi_helper.size) self.logger.log('Transmitting a list of %d lists of file pairs' % (len(calculated_file_list))) transmitted = calculated_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[self.mpi_helper.rank] self.logger.log("Received a list of %d file pairs" % len(new_file_list)) self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") for experiments_filename, reflections_filename in new_file_list: experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = easy_pickle.load(reflections_filename) for experiment_id, experiment in enumerate(experiments): experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) all_experiments.append(experiment) refls = reflections.select(reflections['id'] == experiment_id) refls['exp_id'] = flex.std_string(len(refls), experiment.identifier) all_reflections.extend(refls) self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log(get_memory_usage()) # Count the loaded data data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters # optionally write a file list mapping to disk, useful in post processing if save_experiments_and_reflections=True file_id_from_names = None if self.params.output.expanded_bookkeeping: apath = lambda x: os.path.abspath(x) file_names_from_id = { i_f: tuple(map(apath, exp_ref_pair)) for i_f, exp_ref_pair in enumerate(file_list) } with open( os.path.join(self.params.output.output_dir, "file_list_map.json"), "w") as o: json.dump(file_names_from_id, o) file_id_from_names = { tuple(map(apath, exp_ref_pair)): i_f for i_f, exp_ref_pair in enumerate(file_list) } per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list, file_id_from_names else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") new_file_list, file_names_mapping = self.mpi_helper.comm.bcast( transmitted, root=0) new_file_list = new_file_list[ self.mpi_helper. rank] if self.mpi_helper.rank < len(new_file_list) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: self.logger.log("Reading %s %s" % (experiments_filename, reflections_filename)) experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=self.params.input.read_image_headers) reflections = flex.reflection_table.from_file( reflections_filename) if self.params.output.expanded_bookkeeping: # NOTE: these are un-prunable reflections["input_refl_index"] = flex.int( list(range(len(reflections)))) reflections["orig_exp_id"] = reflections['id'] assert file_names_mapping is not None exp_ref_pair = os.path.abspath( experiments_filename), os.path.abspath( reflections_filename) this_refl_fileMappings = [ file_names_mapping[exp_ref_pair] ] * len(reflections) reflections["file_list_mapping"] = flex.int( this_refl_fileMappings) self.logger.log("Data read, prepping") if 'intensity.sum.value' in reflections: reflections[ 'intensity.sum.value.unmodified'] = reflections[ 'intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections[ 'intensity.sum.variance.unmodified'] = reflections[ 'intensity.sum.variance'] * 1 new_ids = flex.int(len(reflections), -1) new_identifiers = flex.std_string(len(reflections)) eid = reflections.experiment_identifiers() for k in eid.keys(): del eid[k] if self.params.output.expanded_bookkeeping: preGen_experiment_identifiers(experiments, experiments_filename) for experiment_id, experiment in enumerate(experiments): # select reflections of the current experiment refls_sel = reflections['id'] == experiment_id if refls_sel.count(True) == 0: continue if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) if not self.params.input.keep_imagesets: experiment.imageset = None all_experiments.append(experiment) # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally new_identifiers.set_selected(refls_sel, experiment.identifier) new_id = len(all_experiments) - 1 eid[new_id] = experiment.identifier new_ids.set_selected(refls_sel, new_id) assert (new_ids < 0 ).count(True) == 0, "Not all reflections accounted for" reflections['id'] = new_ids reflections['exp_id'] = new_identifiers all_reflections.extend(reflections) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) all_reflections = self.prune_reflection_table_keys(all_reflections) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def batch_plot_shapes_and_annotations(self): light_grey = "#d3d3d3" grey = "#808080" shapes = [] annotations = [] batches = flex.int(self.batches) text = flex.std_string(batches.size()) for i, batch in enumerate(self.batch_params): fillcolor = [light_grey, grey][i % 2] # alternate colours shapes.append({ "type": "rect", # x-reference is assigned to the x-values "xref": "x", # y-reference is assigned to the plot paper [0,1] "yref": "paper", "x0": self._batch_increments[i], "y0": 0, "x1": self._batch_increments[i] + (batch["range"][1] - batch["range"][0]), "y1": 1, "fillcolor": fillcolor, "opacity": 0.2, "line": { "width": 0 }, }) annotations.append({ # x-reference is assigned to the x-values "xref": "x", # y-reference is assigned to the plot paper [0,1] "yref": "paper", "x": self._batch_increments[i] + (batch["range"][1] - batch["range"][0]) / 2, "y": 1, "text": f"{batch['id']}", "showarrow": False, "yshift": 20, # 'arrowhead': 7, # 'ax': 0, # 'ay': -40 }) sel = (batches >= batch["range"][0]) & (batches <= batch["range"][1]) text.set_selected( sel, flex.std_string([ f"{batch['id']}: {j - batch['range'][0] + 1}" for j in batches.select(sel) ]), ) return shapes, annotations, list(text)
def run_once(directory): from dxtbx.serialize import load sweep_dir = os.path.basename(directory) print sweep_dir datablock_name = os.path.join(directory, "datablock.json") if not os.path.exists(datablock_name): # this is what xia2 calls it: datablock_name = os.path.join(directory, "datablock_import.json") strong_spots_name = os.path.join(directory, "strong.pickle") experiments_name = os.path.join(directory, "experiments.json") indexed_spots_name = os.path.join(directory, "indexed.pickle") unindexed_spots_name = os.path.join(directory, "unindexed.pickle") if not (os.path.exists(datablock_name) and os.path.exists(strong_spots_name)): return datablock = load.datablock(datablock_name) assert len(datablock) == 1 if len(datablock[0].extract_sweeps()) == 0: print "Skipping %s" %directory return sweep = datablock[0].extract_sweeps()[0] template = sweep.get_template() strong_spots = easy_pickle.load(strong_spots_name) n_strong_spots = len(strong_spots) if os.path.exists(experiments_name): experiments = load.experiment_list(experiments_name) n_indexed_lattices = len(experiments) else: experiments = None n_indexed_lattices = 0 g = glob.glob(os.path.join(directory, "xds*", "run_2", "INTEGRATE.HKL")) n_integrated_lattices = len(g) if os.path.exists(indexed_spots_name): indexed_spots = easy_pickle.load(indexed_spots_name) else: indexed_spots = None g = glob.glob(os.path.join(directory, "indexed_*.pickle")) if len(g): for path in g: if indexed_spots is None: indexed_spots = easy_pickle.load(path) else: indexed_spots.extend(easy_pickle.load(path)) if os.path.exists(unindexed_spots_name): unindexed_spots = easy_pickle.load(unindexed_spots_name) n_unindexed_spots = len(unindexed_spots) else: n_unindexed_spots = 0 # calculate estimated d_min for sweep based on 95th percentile from dials.algorithms.indexing import indexer detector = sweep.get_detector() scan = sweep.get_scan() beam = sweep.get_beam() goniometer = sweep.get_goniometer() if len(strong_spots) == 0: d_strong_spots_99th_percentile = 0 d_strong_spots_95th_percentile = 0 d_strong_spots_50th_percentile = 0 n_strong_spots_dmin_4 = 0 else: spots_mm = indexer.indexer_base.map_spots_pixel_to_mm_rad( strong_spots, detector, scan) indexer.indexer_base.map_centroids_to_reciprocal_space( spots_mm, detector, beam, goniometer) d_spacings = 1/spots_mm['rlp'].norms() perm = flex.sort_permutation(d_spacings, reverse=True) d_spacings_sorted = d_spacings.select(perm) percentile_99th = int(math.floor(0.99 * len(d_spacings))) percentile_95th = int(math.floor(0.95 * len(d_spacings))) percentile_50th = int(math.floor(0.5 * len(d_spacings))) d_strong_spots_99th_percentile = d_spacings_sorted[percentile_99th] d_strong_spots_95th_percentile = d_spacings_sorted[percentile_95th] d_strong_spots_50th_percentile = d_spacings_sorted[percentile_50th] n_strong_spots_dmin_4 = (d_spacings >= 4).count(True) cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() sweep_dir_cryst = flex.std_string() if experiments is not None: for i, experiment in enumerate(experiments): sweep_dir_cryst.append(sweep_dir) crystal_model = experiment.crystal unit_cell = crystal_model.get_unit_cell() space_group = crystal_model.get_space_group() crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) cb_op_reference_setting = crystal_symmetry.change_of_basis_op_to_reference_setting() crystal_symmetry_reference_setting = crystal_symmetry.change_basis( cb_op_reference_setting) cell_params.append(crystal_symmetry_reference_setting.unit_cell().parameters()) spots_mm = indexed_spots.select(indexed_spots['id'] == i) n_indexed.append(len(spots_mm)) if len(spots_mm) == 0: d_min_indexed.append(0) else: indexer.indexer_base.map_centroids_to_reciprocal_space( spots_mm, detector, beam, goniometer) d_spacings = 1/spots_mm['rlp'].norms() perm = flex.sort_permutation(d_spacings, reverse=True) d_min_indexed.append(d_spacings[perm[-1]]) try: rmsds.append(get_rmsds_obs_pred(spots_mm, experiment)) except Exception, e: print e rmsds.append((-1,-1,-1)) continue
def test_del_selected(): # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ["a", "b", "c", "d", "e", "f", "g", "i", "j", "k"] # Create a table with some elements table1 = flex.reflection_table() table1["col1"] = flex.int(c1) table1["col2"] = flex.double(c2) table1["col3"] = flex.std_string(c3) # Del selected columns table1.del_selected(("col3", "col2")) assert table1.nrows() == 10 assert table1.ncols() == 1 assert "col1" in table1 assert "col2" not in table1 assert "col3" not in table1 assert all(a == b for a, b in zip(table1["col1"], c1)) # Del selected columns table1 = flex.reflection_table() table1["col1"] = flex.int(c1) table1["col2"] = flex.double(c2) table1["col3"] = flex.std_string(c3) table1.del_selected(flex.std_string(["col3", "col2"])) assert table1.nrows() == 10 assert table1.ncols() == 1 assert "col1" in table1 assert "col2" not in table1 assert "col3" not in table1 assert all(a == b for a, b in zip(table1["col1"], c1)) # Del selected rows table1 = flex.reflection_table() table1["col1"] = flex.int(c1) table1["col2"] = flex.double(c2) table1["col3"] = flex.std_string(c3) index = flex.size_t([0, 1, 5, 8, 9]) index2 = list(range(10)) for i in index: index2.remove(i) ccc1 = [c1[i] for i in index2] ccc2 = [c2[i] for i in index2] ccc3 = [c3[i] for i in index2] table1.del_selected(index) assert table1.nrows() == len(ccc1) assert all(a == b for a, b in zip(table1["col1"], ccc1)) assert all(a == b for a, b in zip(table1["col2"], ccc2)) assert all(a == b for a, b in zip(table1["col3"], ccc3)) # Del selected rows table1 = flex.reflection_table() table1["col1"] = flex.int(c1) table1["col2"] = flex.double(c2) table1["col3"] = flex.std_string(c3) table1.del_selected(index) assert table1.nrows() == len(ccc1) assert all(a == b for a, b in zip(table1["col1"], ccc1)) assert all(a == b for a, b in zip(table1["col2"], ccc2)) assert all(a == b for a, b in zip(table1["col3"], ccc3))
def tst_del_selected(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table1['col3'] = flex.std_string(c3) # Del selected columns table1.del_selected(('col3', 'col2')) assert(table1.nrows() == 10) assert(table1.ncols() == 1) assert("col1" in table1) assert("col2" not in table1) assert("col3" not in table1) assert(all(a == b for a, b in zip(table1['col1'], c1))) print 'OK' # Del selected columns table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table1['col3'] = flex.std_string(c3) table1.del_selected(flex.std_string(['col3', 'col2'])) assert(table1.nrows() == 10) assert(table1.ncols() == 1) assert("col1" in table1) assert("col2" not in table1) assert("col3" not in table1) assert(all(a == b for a, b in zip(table1['col1'], c1))) print 'OK' # Del selected rows table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table1['col3'] = flex.std_string(c3) index = flex.size_t([0, 1, 5, 8, 9]) index2 = range(10) for i in index: index2.remove(i) ccc1 = [c1[i] for i in index2] ccc2 = [c2[i] for i in index2] ccc3 = [c3[i] for i in index2] table1.del_selected(index) assert(table1.nrows() == len(ccc1)) assert(all(a == b for a, b in zip(table1['col1'], ccc1))) assert(all(a == b for a, b in zip(table1['col2'], ccc2))) assert(all(a == b for a, b in zip(table1['col3'], ccc3))) print 'OK' # Del selected rows table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1['col2'] = flex.double(c2) table1['col3'] = flex.std_string(c3) flags = flex.bool([True, True, False, False, False, True, False, False, True, True]) table1.del_selected(index) assert(table1.nrows() == len(ccc1)) assert(all(a == b for a, b in zip(table1['col1'], ccc1))) assert(all(a == b for a, b in zip(table1['col2'], ccc2))) assert(all(a == b for a, b in zip(table1['col3'], ccc3))) print 'OK'
def tst_set_selected(self): from dials.array_family import flex from copy import deepcopy # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table1 = flex.reflection_table() table2 = flex.reflection_table() table1['col1'] = flex.int(c1) table2['col2'] = flex.double(c2) table2['col3'] = flex.std_string(c3) # Set selected columns table1.set_selected(('col3', 'col2'), table2) assert(table1.nrows() == 10) assert(table1.ncols() == 3) assert(all(a == b for a, b in zip(table1['col1'], c1))) assert(all(a == b for a, b in zip(table1['col2'], c2))) assert(all(a == b for a, b in zip(table1['col3'], c3))) print 'OK' # Set selected columns table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1.set_selected(flex.std_string(['col3', 'col2']), table2) assert(table1.nrows() == 10) assert(table1.ncols() == 3) assert(all(a == b for a, b in zip(table1['col1'], c1))) assert(all(a == b for a, b in zip(table1['col2'], c2))) assert(all(a == b for a, b in zip(table1['col3'], c3))) print 'OK' cc1 = list(range(10, 15)) cc2 = list(range(10, 15)) cc3 = ['l', 'm', 'n', 'o', 'p'] # Set selected rows table2 = flex.reflection_table() table2['col1'] = flex.int(cc1) table2['col2'] = flex.double(cc2) table2['col3'] = flex.std_string(cc3) index = flex.size_t([0, 1, 5, 8, 9]) ccc1 = deepcopy(c1) ccc2 = deepcopy(c2) ccc3 = deepcopy(c3) for j, i in enumerate(index): ccc1[i] = cc1[j] ccc2[i] = cc2[j] ccc3[i] = cc3[j] table1.set_selected(index, table2) assert(all(a == b for a, b in zip(table1['col1'], ccc1))) assert(all(a == b for a, b in zip(table1['col2'], ccc2))) assert(all(a == b for a, b in zip(table1['col3'], ccc3))) print 'OK' # Set selected rows table2 = flex.reflection_table() table2['col1'] = flex.int(cc1) table2['col2'] = flex.double(cc2) table2['col3'] = flex.std_string(cc3) flags = flex.bool([True, True, False, False, False, True, False, False, True, True]) table1.set_selected(index, table2) assert(all(a == b for a, b in zip(table1['col1'], ccc1))) assert(all(a == b for a, b in zip(table1['col2'], ccc2))) assert(all(a == b for a, b in zip(table1['col3'], ccc3))) print 'OK'
def tst_slicing(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table = flex.reflection_table() table['col1'] = flex.int(c1) table['col2'] = flex.double(c2) table['col3'] = flex.std_string(c3) # Try forward slicing new_table = table[2:7:2] assert(new_table.ncols() == 3) assert(new_table.nrows() == 3) assert(new_table.is_consistent()) c11 = c1[2:7:2] c22 = c2[2:7:2] c33 = c3[2:7:2] assert(all(a == b for a, b in zip(new_table['col1'], c11))) assert(all(a == b for a, b in zip(new_table['col2'], c22))) assert(all(a == b for a, b in zip(new_table['col3'], c33))) print 'OK' # Try backward slicing new_table = table[7:2:-2] assert(new_table.ncols() == 3) assert(new_table.nrows() == 3) assert(new_table.is_consistent()) c11 = c1[7:2:-2] c22 = c2[7:2:-2] c33 = c3[7:2:-2] assert(all(a == b for a, b in zip(new_table['col1'], c11))) assert(all(a == b for a, b in zip(new_table['col2'], c22))) assert(all(a == b for a, b in zip(new_table['col3'], c33))) print 'OK' # Try setting forward slicing table[2:7:2] = new_table assert(table.ncols() == 3) assert(table.nrows() == 10) assert(table.is_consistent()) c1[2:7:2] = c11 c2[2:7:2] = c22 c3[2:7:2] = c33 assert(all(a == b for a, b in zip(table['col1'], c1))) assert(all(a == b for a, b in zip(table['col2'], c2))) assert(all(a == b for a, b in zip(table['col3'], c3))) print 'OK' # Try setting backward slicing table[7:2:-2] = new_table assert(table.ncols() == 3) assert(table.nrows() == 10) assert(table.is_consistent()) c1[7:2:-2] = c11 c2[7:2:-2] = c22 c3[7:2:-2] = c33 assert(all(a == b for a, b in zip(table['col1'], c1))) assert(all(a == b for a, b in zip(table['col2'], c2))) assert(all(a == b for a, b in zip(table['col3'], c3))) print 'OK'
def run(self, experiments, reflections): self.logger.log_step_time("POSTREFINEMENT") if (not self.params.postrefinement.enable) or (self.params.scaling.algorithm != "mark0"): # mark1 implies no scaling/post-refinement self.logger.log("No post-refinement was done") if self.mpi_helper.rank == 0: self.logger.main_log("No post-refinement was done") return experiments, reflections target_symm = symmetry(unit_cell = self.params.scaling.unit_cell, space_group_info = self.params.scaling.space_group) i_model = self.params.scaling.i_model miller_set = self.params.scaling.miller_set # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. assert len(i_model.indices()) == len(miller_set.indices()) assert (i_model.indices() == miller_set.indices()).count(False) == 0 new_experiments = ExperimentList() new_reflections = flex.reflection_table() experiments_rejected_by_reason = {} # reason:how_many_rejected for experiment in experiments: exp_reflections = reflections.select(reflections['exp_id'] == experiment.identifier) # Build a miller array for the experiment reflections with original miller indexes exp_miller_indices_original = miller.set(target_symm, exp_reflections['miller_index'], not self.params.merging.merge_anomalous) observations_original_index = miller.array(exp_miller_indices_original, exp_reflections['intensity.sum.value'], flex.double(flex.sqrt(exp_reflections['intensity.sum.variance']))) assert exp_reflections.size() == exp_miller_indices_original.size() assert observations_original_index.size() == exp_miller_indices_original.size() # Build a miller array for the experiment reflections with asu miller indexes exp_miller_indices_asu = miller.set(target_symm, exp_reflections['miller_index_asymmetric'], True) observations = miller.array(exp_miller_indices_asu, exp_reflections['intensity.sum.value'], flex.double(flex.sqrt(exp_reflections['intensity.sum.variance']))) matches = miller.match_multi_indices(miller_indices_unique = miller_set.indices(), miller_indices = observations.indices()) pair1 = flex.int([pair[1] for pair in matches.pairs()]) # refers to the observations pair0 = flex.int([pair[0] for pair in matches.pairs()]) # refers to the model assert exp_reflections.size() == exp_miller_indices_original.size() assert observations_original_index.size() == exp_miller_indices_original.size() # narrow things down to the set that matches, only observations_pair1_selected = observations.customized_copy(indices = flex.miller_index([observations.indices()[p] for p in pair1]), data = flex.double([observations.data()[p] for p in pair1]), sigmas = flex.double([observations.sigmas()[p] for p in pair1])) observations_original_index_pair1_selected = observations_original_index.customized_copy(indices = flex.miller_index([observations_original_index.indices()[p] for p in pair1]), data = flex.double([observations_original_index.data()[p] for p in pair1]), sigmas = flex.double([observations_original_index.sigmas()[p] for p in pair1])) I_observed = observations_pair1_selected.data() MILLER = observations_original_index_pair1_selected.indices() ORI = crystal_orientation(experiment.crystal.get_A(), basis_type.reciprocal) Astar = matrix.sqr(ORI.reciprocal_matrix()) Astar_from_experiment = matrix.sqr(experiment.crystal.get_A()) assert Astar == Astar_from_experiment WAVE = experiment.beam.get_wavelength() BEAM = matrix.col((0.0,0.0,-1./WAVE)) BFACTOR = 0. MOSAICITY_DEG = experiment.crystal.get_half_mosaicity_deg() DOMAIN_SIZE_A = experiment.crystal.get_domain_size_ang() # calculation of correlation here I_reference = flex.double([i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool([i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) use_weights = False # New facility for getting variance-weighted correlation if use_weights: # variance weighting I_weight = flex.double([1./(observations_pair1_selected.sigmas()[pair[1]])**2 for pair in matches.pairs()]) else: I_weight = flex.double(len(observations_pair1_selected.sigmas()), 1.) I_weight.set_selected(I_invalid, 0.) """Explanation of 'include_negatives' semantics as originally implemented in cxi.merge postrefinement: include_negatives = True + and - reflections both used for Rh distribution for initial estimate of RS parameter + and - reflections both used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) include_negatives = False + and - reflections both used for Rh distribution for initial estimate of RS parameter + reflections only used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) """ # RB: By design, for MPI-Merge "include negatives" is implicitly True SWC = simple_weighted_correlation(I_weight, I_reference, I_observed) if self.params.output.log_level == 0: self.logger.log("Old correlation is: %f"%SWC.corr) if self.params.postrefinement.algorithm == "rs": Rhall = flex.double() for mill in MILLER: H = matrix.col(mill) Xhkl = Astar*H Rh = ( Xhkl + BEAM ).length() - (1./WAVE) Rhall.append(Rh) Rs = math.sqrt(flex.mean(Rhall*Rhall)) RS = 1./10000. # reciprocal effective domain size of 1 micron RS = Rs # try this empirically determined approximate, monochrome, a-mosaic value current = flex.double([SWC.slope, BFACTOR, RS, 0., 0.]) parameterization_class = rs_parameterization refinery = rs_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC = I_reference, IOBSVEC = I_observed) elif self.params.postrefinement.algorithm == "eta_deff": eta_init = 2. * MOSAICITY_DEG * math.pi/180. D_eff_init = 2. * DOMAIN_SIZE_A current = flex.double([SWC.slope, BFACTOR, eta_init, 0., 0., D_eff_init]) parameterization_class = eta_deff_parameterization refinery = eta_deff_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC = I_reference, IOBSVEC = I_observed) func = refinery.fvec_callable(parameterization_class(current)) functional = flex.sum(func * func) if self.params.output.log_level == 0: self.logger.log("functional: %f"%functional) self.current = current; self.parameterization_class = parameterization_class self.refinery = refinery; self.observations_pair1_selected = observations_pair1_selected; self.observations_original_index_pair1_selected = observations_original_index_pair1_selected error_detected = False try: self.run_plain() result_observations_original_index, result_observations, result_matches = self.result_for_cxi_merge() assert result_observations_original_index.size() == result_observations.size() assert result_matches.pairs().size() == result_observations_original_index.size() except (AssertionError, ValueError, RuntimeError) as e: error_detected = True reason = repr(e) if not reason: reason = "Unknown error" if not reason in experiments_rejected_by_reason: experiments_rejected_by_reason[reason] = 1 else: experiments_rejected_by_reason[reason] += 1 if not error_detected: new_experiments.append(experiment) new_exp_reflections = flex.reflection_table() new_exp_reflections['miller_index_asymmetric'] = flex.miller_index(result_observations.indices()) new_exp_reflections['intensity.sum.value'] = flex.double(result_observations.data()) new_exp_reflections['intensity.sum.variance'] = flex.double(flex.pow(result_observations.sigmas(),2)) new_exp_reflections['exp_id'] = flex.std_string(len(new_exp_reflections), experiment.identifier) new_reflections.extend(new_exp_reflections) ''' # debugging elif reason.startswith("ValueError"): self.logger.log("Rejected b/c of value error exp id: %s; unit cell: %s"%(exp_id, str(experiment.crystal.get_unit_cell())) ) ''' # report rejected experiments, reflections experiments_rejected_by_postrefinement = len(experiments) - len(new_experiments) reflections_rejected_by_postrefinement = reflections.size() - new_reflections.size() self.logger.log("Experiments rejected by post-refinement: %d"%experiments_rejected_by_postrefinement) self.logger.log("Reflections rejected by post-refinement: %d"%reflections_rejected_by_postrefinement) all_reasons = [] for reason, count in six.iteritems(experiments_rejected_by_reason): self.logger.log("Experiments rejected due to %s: %d"%(reason,count)) all_reasons.append(reason) comm = self.mpi_helper.comm MPI = self.mpi_helper.MPI # Collect all rejection reasons from all ranks. Use allreduce to let each rank have all reasons. all_reasons = comm.allreduce(all_reasons, MPI.SUM) all_reasons = set(all_reasons) # Now that each rank has all reasons from all ranks, we can treat the reasons in a uniform way. total_experiments_rejected_by_reason = {} for reason in all_reasons: rejected_experiment_count = 0 if reason in experiments_rejected_by_reason: rejected_experiment_count = experiments_rejected_by_reason[reason] total_experiments_rejected_by_reason[reason] = comm.reduce(rejected_experiment_count, MPI.SUM, 0) total_accepted_experiment_count = comm.reduce(len(new_experiments), MPI.SUM, 0) # how many reflections have we rejected due to post-refinement? rejected_reflections = len(reflections) - len(new_reflections); total_rejected_reflections = self.mpi_helper.sum(rejected_reflections) if self.mpi_helper.rank == 0: for reason, count in six.iteritems(total_experiments_rejected_by_reason): self.logger.main_log("Total experiments rejected due to %s: %d"%(reason,count)) self.logger.main_log("Total experiments accepted: %d"%total_accepted_experiment_count) self.logger.main_log("Total reflections rejected due to post-refinement: %d"%total_rejected_reflections) self.logger.log_step_time("POSTREFINEMENT", True) return new_experiments, new_reflections
def tst_resizing(self): from dials.array_family import flex # Create a table with 2 empty columns table = flex.reflection_table() assert(table.empty()) table['col1'] = flex.int() table['col2'] = flex.double() assert(table.nrows() == 0) assert(table.ncols() == 2) assert(not table.empty()) assert('col1' in table) assert('col2' in table) assert('col3' not in table) print 'OK' # Create a table with 2 columns and 10 rows table = flex.reflection_table() table['col1'] = flex.int(10) table['col2'] = flex.double(10) assert(table.nrows() == 10) assert(table.ncols() == 2) print 'OK' # Add an extra column with the wrong size (throw) try: table['col3'] = flex.std_string(20) assert(False) except Exception: pass assert(table.nrows() == 10) assert(table.ncols() == 2) assert(table.is_consistent()) assert(len(table['col1']) == 10) assert(len(table['col2']) == 10) assert len(table) == table.size() print 'OK' # Resize the table (should resize all columns) table.resize(50) assert(table.nrows() == 50) assert(table.ncols() == 2) assert(table.is_consistent()) assert(len(table['col1']) == 50) assert(len(table['col2']) == 50) print 'OK' # Make the table inconsistent table['col1'].resize(40) assert(not table.is_consistent()) assert_exception(lambda: table.nrows()) assert_exception(lambda: table.ncols()) print 'OK' # Clear the table table.clear() assert(table.is_consistent()) assert(table.empty()) assert(table.nrows() == 0) assert(table.ncols() == 0) print 'OK'
def run(args): sweep_directories = [] templates = [] n_strong_spots = flex.int() n_strong_spots_dmin_4 = flex.int() d_strong_spots_99th_percentile = flex.double() d_strong_spots_95th_percentile = flex.double() d_strong_spots_50th_percentile = flex.double() n_unindexed_spots = flex.int() n_indexed_lattices = flex.int() n_integrated_lattices = flex.int() sweep_dir_cryst = flex.std_string() orig_dir = os.path.abspath(os.curdir) rmsds = flex.vec3_double() cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() nproc = easy_mp.get_processes(libtbx.Auto) #nproc = 1 results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True, ) for result in results: if result is None: continue sweep_directories.append(result.sweep_dir) templates.append(result.template) n_strong_spots.append(result.n_strong_spots) n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4) n_unindexed_spots.append(result.n_unindexed_spots) n_indexed_lattices.append(result.n_indexed_lattices) n_integrated_lattices.append(result.n_integrated_lattices) d_strong_spots_50th_percentile.append(result.d_strong_spots_50th_percentile) d_strong_spots_95th_percentile.append(result.d_strong_spots_95th_percentile) d_strong_spots_99th_percentile.append(result.d_strong_spots_99th_percentile) cell_params.extend(result.cell_params) n_indexed.extend(result.n_indexed) d_min_indexed.extend(result.d_min_indexed) rmsds.extend(result.rmsds) sweep_dir_cryst.extend(result.sweep_dir_cryst) table_data = [('sweep_dir', 'template', '#strong_spots', '#unindexed_spots', '#lattices', 'd_spacing_50th_percentile', 'd_spacing_95th_percentile', 'd_spacing_99th_percentile',)] for i in range(len(sweep_directories)): table_data.append((sweep_directories[i], templates[i], str(n_strong_spots[i]), str(n_unindexed_spots[i]), str(n_indexed_lattices[i]), str(d_strong_spots_50th_percentile[i]), str(d_strong_spots_95th_percentile[i]), str(d_strong_spots_99th_percentile[i]), )) with open('results.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') table_data = [('sweep_dir', 'cell_a', 'cell_b', 'cell_c', 'alpha', 'beta', 'gamma', '#indexed_reflections', 'd_min_indexed', 'rmsd_x', 'rmsd_y', 'rmsd_phi')] for i in range(len(cell_params)): table_data.append((sweep_dir_cryst[i], str(cell_params[i][0]), str(cell_params[i][1]), str(cell_params[i][2]), str(cell_params[i][3]), str(cell_params[i][4]), str(cell_params[i][5]), str(n_indexed[i]), str(d_min_indexed[i]), str(rmsds[i][0]), str(rmsds[i][1]), str(rmsds[i][2]), )) with open('results_indexed.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') cell_a = flex.double([params[0] for params in cell_params]) cell_b = flex.double([params[1] for params in cell_params]) cell_c = flex.double([params[2] for params in cell_params]) cell_alpha = flex.double([params[3] for params in cell_params]) cell_beta = flex.double([params[4] for params in cell_params]) cell_gamma = flex.double([params[5] for params in cell_params]) from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc('font', family='serif') pyplot.rc('font', serif='Times New Roman') red, blue = '#B2182B', '#2166AC' hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Spot count') ax.set_ylabel('Frequency') pdf = PdfPages("spot_count_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() hist = flex.histogram(n_indexed_lattices.as_double(), n_slots=flex.max(n_indexed_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of indexed lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_indexed_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() if flex.max(n_integrated_lattices) > 0: hist = flex.histogram(n_integrated_lattices.as_double(), n_slots=flex.max(n_integrated_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of integrated lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_integrated_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False) for i, cell_param in enumerate( (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)): ax = axes.flat[i] flex.min_max_mean_double(cell_param).show() print flex.median(cell_param) hist = flex.histogram(cell_param, n_slots=20) hist.show() ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Cell parameter') ax.set_ylabel('Frequency') pyplot.tight_layout() pdf = PdfPages("cell_parameters.pdf") pdf.savefig(fig) pdf.close()
def export_mtz(observed_hkls, experiment, filename): if experiment.goniometer: axis = experiment.goniometer.get_rotation_axis() else: axis = 0.0, 0.0, 0.0 s0 = experiment.beam.get_s0() wavelength = experiment.beam.get_wavelength() from scitbx import matrix panel = experiment.detector[0] pixel_size = panel.get_pixel_size() cb_op_to_ref = experiment.crystal.get_space_group().info( ).change_of_basis_op_to_reference_setting() experiment.crystal = experiment.crystal.change_basis(cb_op_to_ref) from iotbx import mtz from scitbx.array_family import flex import itertools m = mtz.object() m.set_title('from dials.scratch.mg.strategy_i19') m.set_space_group_info(experiment.crystal.get_space_group().info()) nrefcount = sum(observed_hkls.itervalues()) nref = max(observed_hkls.itervalues()) for batch in range(1, nref+1): o = m.add_batch().set_num(batch).set_nbsetid(1).set_ncryst(1) o.set_time1(0.0).set_time2(0.0).set_title('Batch %d' % batch) o.set_ndet(1).set_theta(flex.float((0.0, 0.0))).set_lbmflg(0) o.set_alambd(wavelength).set_delamb(0.0).set_delcor(0.0) o.set_divhd(0.0).set_divvd(0.0) o.set_so(flex.float(s0)).set_source(flex.float((0, 0, -1))) o.set_bbfac(0.0).set_bscale(1.0) o.set_sdbfac(0.0).set_sdbscale(0.0).set_nbscal(0) _unit_cell = experiment.crystal.get_unit_cell() _U = experiment.crystal.get_U() o.set_cell(flex.float(_unit_cell.parameters())) o.set_lbcell(flex.int((-1, -1, -1, -1, -1, -1))) o.set_umat(flex.float(_U.transpose().elems)) mosaic = experiment.crystal.get_mosaicity() o.set_crydat(flex.float([mosaic, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])) o.set_lcrflg(0) o.set_datum(flex.float((0.0, 0.0, 0.0))) # detector size, distance o.set_detlm(flex.float([0.0, panel.get_image_size()[0], 0.0, panel.get_image_size()[1], 0, 0, 0, 0])) o.set_dx(flex.float([panel.get_directed_distance(), 0.0])) # goniometer axes and names, and scan axis number, and number of axes, missets o.set_e1(flex.float(axis)) o.set_e2(flex.float((0.0, 0.0, 0.0))) o.set_e3(flex.float((0.0, 0.0, 0.0))) o.set_gonlab(flex.std_string(('AXIS', '', ''))) o.set_jsaxs(1) o.set_ngonax(1) o.set_phixyz(flex.float((0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) phi_start, phi_range = 0.0, 0.0 o.set_phistt(phi_start) o.set_phirange(phi_range) o.set_phiend(phi_start + phi_range) o.set_scanax(flex.float(axis)) # number of misorientation angles o.set_misflg(0) # crystal axis closest to rotation axis (why do I want this?) o.set_jumpax(0) # type of data - 1; 2D, 2; 3D, 3; Laue o.set_ldtype(2) # now create the actual data structures - first keep a track of the columns # H K L M/ISYM BATCH I SIGI IPR SIGIPR FRACTIONCALC XDET YDET ROT WIDTH # LP MPART FLAG BGPKRATIOS from cctbx.array_family import flex as cflex # implicit import # now go for it and make an MTZ file... x = m.add_crystal('XTAL', 'DIALS', unit_cell.parameters()) d = x.add_dataset('FROMDIALS', wavelength) # now add column information... type_table = {'IPR': 'J', 'BGPKRATIOS': 'R', 'WIDTH': 'R', 'I': 'J', 'H': 'H', 'K': 'H', 'MPART': 'I', 'L': 'H', 'BATCH': 'B', 'M_ISYM': 'Y', 'SIGI': 'Q', 'FLAG': 'I', 'XDET': 'R', 'LP': 'R', 'YDET': 'R', 'SIGIPR': 'Q', 'FRACTIONCALC': 'R', 'ROT': 'R'} m.adjust_column_array_sizes(nrefcount) m.set_n_reflections(nrefcount) # assign H, K, L, M_ISYM space for column in 'H', 'K', 'L', 'M_ISYM': d.add_column(column, type_table[column]).set_values(flex.float(nrefcount, 0.0)) batchnums = ( _ for (x, n) in observed_hkls.iteritems() for _ in range(1, n+1) ) d.add_column('BATCH', type_table['BATCH']).set_values(flex.float(batchnums)) d.add_column('FRACTIONCALC', type_table['FRACTIONCALC']).set_values(flex.float(nrefcount, 3.0)) m.replace_original_index_miller_indices(cb_op_to_ref.apply( cflex.miller_index([ _ for (x, n) in observed_hkls.iteritems() for _ in itertools.repeat(x, n) ]) )) m.write(filename) return m
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[ self.mpi_helper. rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = flex.reflection_table.from_file( reflections_filename) for experiment_id, experiment in enumerate(experiments): if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) all_experiments.append(experiment) #experiment.identifier = "%d"%(len(all_experiments) - 1) # select reflections of the current experiment refls = reflections.select( reflections['id'] == experiment_id) # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally #refls['id'] = flex.size_t(len(refls), len(all_experiments)-1) refls['exp_id'] = flex.std_string(len(refls), experiment.identifier) all_reflections.extend(refls) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.reflection_table_utils import reflection_table_utils all_reflections = reflection_table_utils.prune_reflection_table_keys( reflections=all_reflections, keys_to_keep=[ 'intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', 'exp_id', 's1' ]) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB" % get_memory_usage()) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def run(): # The script usage usage = "usage: xia2.multi_crystal_scale_and_merge [options] [param.phil] " \ "experiments1.json experiments2.json reflections1.pickle " \ "reflections2.pickle..." # Create the parser parser = OptionParser( usage=usage, phil=phil_scope, read_reflections=True, read_experiments=True, check_format=False, epilog=help_message) # Parse the command line params, options = parser.parse_args(show_diff_phil=True) # Configure the logging for name in ('xia2', 'dials'): log.config( info=params.output.log, debug=params.output.debug_log, name=name) from dials.util.version import dials_version logger.info(dials_version()) # Try to load the models and data if len(params.input.experiments) == 0: logger.info("No Experiments found in the input") parser.print_help() return if len(params.input.reflections) == 0: logger.info("No reflection data found in the input") parser.print_help() return try: assert len(params.input.reflections) == len(params.input.experiments) except AssertionError: raise Sorry("The number of input reflections files does not match the " "number of input experiments") if params.seed is not None: import random flex.set_random_seed(params.seed) random.seed(params.seed) expt_filenames = OrderedDict((e.filename, e.data) for e in params.input.experiments) refl_filenames = OrderedDict((r.filename, r.data) for r in params.input.reflections) experiments = flatten_experiments(params.input.experiments) reflections = flatten_reflections(params.input.reflections) reflections_all = flex.reflection_table() assert len(reflections) == 1 or len(reflections) == len(experiments) if len(reflections) > 1: for i, (expt, refl) in enumerate(zip(experiments, reflections)): expt.identifier = '%i' % i refl['identifier'] = flex.std_string(refl.size(), expt.identifier) refl['id'] = flex.int(refl.size(), i) reflections_all.extend(refl) reflections_all.experiment_identifiers()[i] = expt.identifier else: reflections_all = reflections[0] assert 'identifier' in reflections_all assert len(set(reflections_all['identifier'])) == len(experiments) assert reflections_all.are_experiment_identifiers_consistent(experiments) if params.identifiers is not None: identifiers = [] for identifier in params.identifiers: identifiers.extend(identifier.split(',')) params.identifiers = identifiers scaled = ScaleAndMerge.MultiCrystalScale(experiments, reflections_all, params)
def _add_batch( mtz, experiment, wavelength, dataset_id, batch_number, image_number, force_static_model, ): """Add a single image's metadata to an mtz file. Returns the batch object. """ assert batch_number > 0 # Recalculate useful numbers and references here # We ignore panels beyond the first one, at the moment panel = experiment.detector[0] if experiment.goniometer: axis = matrix.col(experiment.goniometer.get_rotation_axis()) else: axis = 0.0, 0.0, 0.0 U = matrix.sqr(experiment.crystal.get_U()) if experiment.goniometer is not None: F = matrix.sqr(experiment.goniometer.get_fixed_rotation()) else: F = matrix.sqr((1, 0, 0, 0, 1, 0, 0, 0, 1)) # Create the batch object and start configuring it o = mtz.add_batch().set_num(batch_number).set_nbsetid( dataset_id).set_ncryst(1) o.set_time1(0.0).set_time2(0.0).set_title("Batch {}".format(batch_number)) o.set_ndet(1).set_theta(flex.float((0.0, 0.0))).set_lbmflg(0) o.set_alambd(wavelength).set_delamb(0.0).set_delcor(0.0) o.set_divhd(0.0).set_divvd(0.0) # FIXME hard-coded assumption on indealized beam vector below... this may be # broken when we come to process data from a non-imgCIF frame s0n = matrix.col(experiment.beam.get_s0()).normalize().elems o.set_so(flex.float(s0n)).set_source(flex.float((0, 0, -1))) # these are probably 0, 1 respectively, also flags for how many are set, sd o.set_bbfac(0.0).set_bscale(1.0) o.set_sdbfac(0.0).set_sdbscale(0.0).set_nbscal(0) # unit cell (this is fine) and the what-was-refined-flags FIXME hardcoded # take time-varying parameters from the *end of the frame* unlikely to # be much different at the end - however only exist if scan-varying # refinement was used if not force_static_model and experiment.crystal.num_scan_points > 0: # Get the index of the image in the sequence e.g. first => 0, second => 1 image_index = image_number - experiment.scan.get_image_range()[0] _unit_cell = experiment.crystal.get_unit_cell_at_scan_point( image_index) _U = matrix.sqr(experiment.crystal.get_U_at_scan_point(image_index)) else: _unit_cell = experiment.crystal.get_unit_cell() _U = U # apply the fixed rotation to this to unify matrix definitions - F * U # was what was used in the actual prediction: U appears to be stored # as the transpose?! At least is for Mosflm... # # FIXME Do we need to apply the setting rotation here somehow? i.e. we have # the U.B. matrix assuming that the axis is equal to S * axis_datum but # here we are just giving the effective axis so at scan angle 0 this will # not be correct... FIXME 2 not even sure we can express the stack of # matrices S * R * F * U * B in MTZ format?... see [=A=] below _U = matrix.sqr(dials_u_to_mosflm(F * _U, _unit_cell)) # FIXME need to get what was refined and what was constrained from the # crystal model - see https://github.com/dials/dials/issues/355 o.set_cell(flex.float(_unit_cell.parameters())) o.set_lbcell(flex.int((-1, -1, -1, -1, -1, -1))) o.set_umat(flex.float(_U.transpose().elems)) # get the mosaic spread though today it may not actually be set - should # this be in the BATCH headers? try: mosaic = experiment.crystal.get_mosaicity() except AttributeError: mosaic = 0 o.set_crydat( flex.float( [mosaic, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])) o.set_lcrflg(0) o.set_datum(flex.float((0.0, 0.0, 0.0))) # detector size, distance o.set_detlm( flex.float([ 0.0, panel.get_image_size()[0], 0.0, panel.get_image_size()[1], 0, 0, 0, 0 ])) o.set_dx(flex.float([panel.get_directed_distance(), 0.0])) # goniometer axes and names, and scan axis number, and num axes, missets # [=A=] should we be using this to unroll the setting matrix etc? o.set_e1(flex.float(axis)) o.set_e2(flex.float((0.0, 0.0, 0.0))) o.set_e3(flex.float((0.0, 0.0, 0.0))) o.set_gonlab(flex.std_string(("AXIS", "", ""))) o.set_jsaxs(1) o.set_ngonax(1) o.set_phixyz(flex.float((0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) # scan ranges, axis if experiment.scan: phi_start, phi_range = experiment.scan.get_image_oscillation( image_number) else: phi_start, phi_range = 0.0, 0.0 o.set_phistt(phi_start) o.set_phirange(phi_range) o.set_phiend(phi_start + phi_range) o.set_scanax(flex.float(axis)) # number of misorientation angles o.set_misflg(0) # crystal axis closest to rotation axis (why do I want this?) o.set_jumpax(0) # type of data - 1; 2D, 2; 3D, 3; Laue o.set_ldtype(2) return o
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0,2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log("Initial number of experiments: %d; Initial number of reflections: %d"%(starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log("Built an input list of %d json/pickle file pairs"%(len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log('Transmitting a list of %d lists of json/pickle file pairs'%(len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root = 0) new_file_list = transmitted[self.mpi_helper.rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs"%len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: self.logger.log("Reading %s %s"%(experiments_filename, reflections_filename)) experiments = ExperimentListFactory.from_json_file(experiments_filename, check_format = False) reflections = flex.reflection_table.from_file(reflections_filename) self.logger.log("Data read, prepping") if 'intensity.sum.value' in reflections: reflections['intensity.sum.value.unmodified'] = reflections['intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections['intensity.sum.variance.unmodified'] = reflections['intensity.sum.variance'] * 1 new_ids = flex.int(len(reflections), -1) new_identifiers = flex.std_string(len(reflections)) eid = reflections.experiment_identifiers() for k in eid.keys(): del eid[k] for experiment_id, experiment in enumerate(experiments): # select reflections of the current experiment refls_sel = reflections['id'] == experiment_id if refls_sel.count(True) == 0: continue if experiment.identifier is None or len(experiment.identifier) == 0: experiment.identifier = create_experiment_identifier(experiment, experiments_filename, experiment_id) all_experiments.append(experiment) # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally new_identifiers.set_selected(refls_sel, experiment.identifier) new_id = len(all_experiments)-1 eid[new_id] = experiment.identifier new_ids.set_selected(refls_sel, new_id) assert (new_ids < 0).count(True) == 0, "Not all reflections accounted for" reflections['id'] = new_ids reflections['exp_id'] = new_identifiers all_reflections.extend(reflections) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections'%(len(all_experiments)-starting_expts_count, len(all_reflections)-starting_refls_count)) self.logger.log("Memory usage: %d MB"%get_memory_usage()) all_reflections = self.prune_reflection_table_keys(all_reflections) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def tst_row_operations(self): from dials.array_family import flex # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table = flex.reflection_table() table['col1'] = flex.int(c1) table['col2'] = flex.double(c2) table['col3'] = flex.std_string(c3) # Extend the table table.extend(table) c1 = c1 * 2 c2 = c2 * 2 c3 = c3 * 2 assert (table.nrows() == 20) assert (table.ncols() == 3) assert (table.is_consistent()) assert (all(a == b for a, b in zip(table['col1'], c1))) assert (all(a == b for a, b in zip(table['col2'], c2))) assert (all(a == b for a, b in zip(table['col3'], c3))) print 'OK' # Append some rows to the table row = {'col1': 10} c1 = c1 + [10] c2 = c2 + [0] c3 = c3 + [''] table.append(row) assert (table.nrows() == 21) assert (table.ncols() == 3) assert (table.is_consistent()) assert (all(a == b for a, b in zip(table['col1'], c1))) assert (all(a == b for a, b in zip(table['col2'], c2))) assert (all(a == b for a, b in zip(table['col3'], c3))) print 'OK' row = {'col2': 11} c1 = c1 + [0] c2 = c2 + [11] c3 = c3 + [''] table.append(row) assert (table.nrows() == 22) assert (table.ncols() == 3) assert (table.is_consistent()) assert (all(a == b for a, b in zip(table['col1'], c1))) assert (all(a == b for a, b in zip(table['col2'], c2))) assert (all(a == b for a, b in zip(table['col3'], c3))) print 'OK' row = {'col1': 12, 'col2': 12, 'col3': 'l'} c1 = c1 + [12] c2 = c2 + [12] c3 = c3 + ['l'] table.append(row) assert (table.nrows() == 23) assert (table.ncols() == 3) assert (table.is_consistent()) assert (all(a == b for a, b in zip(table['col1'], c1))) assert (all(a == b for a, b in zip(table['col2'], c2))) assert (all(a == b for a, b in zip(table['col3'], c3))) print 'OK' # Try inserting some rows row = {'col1': -1} c1.insert(5, -1) c2.insert(5, 0) c3.insert(5, '') table.insert(5, row) assert (table.nrows() == 24) assert (table.ncols() == 3) assert (table.is_consistent()) assert (all(a == b for a, b in zip(table['col1'], c1))) assert (all(a == b for a, b in zip(table['col2'], c2))) assert (all(a == b for a, b in zip(table['col3'], c3))) print 'OK' row = {'col1': -2, 'col2': -3, 'col3': 'abc'} c1.insert(2, -2) c2.insert(2, -3) c3.insert(2, 'abc') table.insert(2, row) assert (table.nrows() == 25) assert (table.ncols() == 3) assert (table.is_consistent()) assert (all(a == b for a, b in zip(table['col1'], c1))) assert (all(a == b for a, b in zip(table['col2'], c2))) assert (all(a == b for a, b in zip(table['col3'], c3))) print 'OK' # Try iterating through table rows for i in range(table.nrows()): row = table[i] assert (row['col1'] == c1[i]) assert (row['col2'] == c2[i]) assert (row['col3'] == c3[i]) print 'OK' # Trying setting some rows row = {'col1': 100} table[2] = row assert (table[2]['col1'] == 100) assert (table[2]['col2'] == c2[2]) assert (table[2]['col3'] == c3[2]) row = {'col1': 1000, 'col2': 2000, 'col3': 'hello'} table[10] = row assert (table[10]['col1'] == 1000) assert (table[10]['col2'] == 2000) assert (table[10]['col3'] == 'hello') print 'OK'
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[ self.mpi_helper. rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = flex.reflection_table.from_file( reflections_filename) # NOTE: had to use slicing below because it selection no longer works... reflections.sort("id") unique_refl_ids = set(reflections['id']) assert len(unique_refl_ids) == len( experiments ), "refl table and experiment list should contain data on same experiment " # TODO: decide if this is true assert min( reflections["id"] ) >= 0, "No more -1 in the id column, ideally it should be the numerical index of experiment, but beware that this is not enforced anywhere in the upstream code base" if 'intensity.sum.value' in reflections: reflections[ 'intensity.sum.value.unmodified'] = reflections[ 'intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections[ 'intensity.sum.variance.unmodified'] = reflections[ 'intensity.sum.variance'] * 1 for experiment_id, experiment in enumerate(experiments): if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) all_experiments.append(experiment) # select reflections of the current experiment # FIXME the selection was broke for me, it raised # RuntimeError: boost::bad_get: failed value get using boost::get #refls = reflections.select(reflections['id'] == experiment_id) # NOTE: this is a hack due to the broken expereimnt_id selection above exp_id_pos = np.where( reflections['id'] == experiment_id)[0] assert exp_id_pos.size, "no refls in this experiment" # NOTE: maybe we can relax this assertion ? refls = reflections[exp_id_pos[0]:exp_id_pos[-1] + 1] #FIXME: how will this work if reading in multiple composite mode experiment jsons? # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally refls['exp_id'] = flex.std_string(len(refls), experiment.identifier) new_id = 0 if len(all_reflections) > 0: new_id = max(all_reflections['id']) + 1 # FIXME: it is hard to interperet that a function call returning a changeable property eid = refls.experiment_identifiers() for k in eid.keys(): del eid[k] eid[new_id] = experiment.identifier refls['id'] = flex.int(len(refls), new_id) all_reflections.extend(refls) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.reflection_table_utils import reflection_table_utils all_reflections = reflection_table_utils.prune_reflection_table_keys( reflections=all_reflections, keys_to_keep=[ 'intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', 'exp_id', 's1', 'intensity.sum.value.unmodified', 'intensity.sum.variance.unmodified' ]) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB" % get_memory_usage()) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def tst_set_selected(self): from dials.array_family import flex from copy import deepcopy # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'] # Create a table with some elements table1 = flex.reflection_table() table2 = flex.reflection_table() table1['col1'] = flex.int(c1) table2['col2'] = flex.double(c2) table2['col3'] = flex.std_string(c3) # Set selected columns table1.set_selected(('col3', 'col2'), table2) assert (table1.nrows() == 10) assert (table1.ncols() == 3) assert (all(a == b for a, b in zip(table1['col1'], c1))) assert (all(a == b for a, b in zip(table1['col2'], c2))) assert (all(a == b for a, b in zip(table1['col3'], c3))) print 'OK' # Set selected columns table1 = flex.reflection_table() table1['col1'] = flex.int(c1) table1.set_selected(flex.std_string(['col3', 'col2']), table2) assert (table1.nrows() == 10) assert (table1.ncols() == 3) assert (all(a == b for a, b in zip(table1['col1'], c1))) assert (all(a == b for a, b in zip(table1['col2'], c2))) assert (all(a == b for a, b in zip(table1['col3'], c3))) print 'OK' cc1 = list(range(10, 15)) cc2 = list(range(10, 15)) cc3 = ['l', 'm', 'n', 'o', 'p'] # Set selected rows table2 = flex.reflection_table() table2['col1'] = flex.int(cc1) table2['col2'] = flex.double(cc2) table2['col3'] = flex.std_string(cc3) index = flex.size_t([0, 1, 5, 8, 9]) ccc1 = deepcopy(c1) ccc2 = deepcopy(c2) ccc3 = deepcopy(c3) for j, i in enumerate(index): ccc1[i] = cc1[j] ccc2[i] = cc2[j] ccc3[i] = cc3[j] table1.set_selected(index, table2) assert (all(a == b for a, b in zip(table1['col1'], ccc1))) assert (all(a == b for a, b in zip(table1['col2'], ccc2))) assert (all(a == b for a, b in zip(table1['col3'], ccc3))) print 'OK' # Set selected rows table2 = flex.reflection_table() table2['col1'] = flex.int(cc1) table2['col2'] = flex.double(cc2) table2['col3'] = flex.std_string(cc3) flags = flex.bool( [True, True, False, False, False, True, False, False, True, True]) table1.set_selected(index, table2) assert (all(a == b for a, b in zip(table1['col1'], ccc1))) assert (all(a == b for a, b in zip(table1['col2'], ccc2))) assert (all(a == b for a, b in zip(table1['col3'], ccc3))) print 'OK'
def test_row_operations(): # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ["a", "b", "c", "d", "e", "f", "g", "i", "j", "k"] # Create a table with some elements table = flex.reflection_table() table["col1"] = flex.int(c1) table["col2"] = flex.double(c2) table["col3"] = flex.std_string(c3) # Extend the table table.extend(table) c1 = c1 * 2 c2 = c2 * 2 c3 = c3 * 2 assert table.nrows() == 20 assert table.ncols() == 3 assert table.is_consistent() assert all(a == b for a, b in zip(table["col1"], c1)) assert all(a == b for a, b in zip(table["col2"], c2)) assert all(a == b for a, b in zip(table["col3"], c3)) # Append some rows to the table row = {"col1": 10} c1 = c1 + [10] c2 = c2 + [0] c3 = c3 + [""] table.append(row) assert table.nrows() == 21 assert table.ncols() == 3 assert table.is_consistent() assert all(a == b for a, b in zip(table["col1"], c1)) assert all(a == b for a, b in zip(table["col2"], c2)) assert all(a == b for a, b in zip(table["col3"], c3)) row = {"col2": 11} c1 = c1 + [0] c2 = c2 + [11] c3 = c3 + [""] table.append(row) assert table.nrows() == 22 assert table.ncols() == 3 assert table.is_consistent() assert all(a == b for a, b in zip(table["col1"], c1)) assert all(a == b for a, b in zip(table["col2"], c2)) assert all(a == b for a, b in zip(table["col3"], c3)) row = {"col1": 12, "col2": 12, "col3": "l"} c1 = c1 + [12] c2 = c2 + [12] c3 = c3 + ["l"] table.append(row) assert table.nrows() == 23 assert table.ncols() == 3 assert table.is_consistent() assert all(a == b for a, b in zip(table["col1"], c1)) assert all(a == b for a, b in zip(table["col2"], c2)) assert all(a == b for a, b in zip(table["col3"], c3)) # Try inserting some rows row = {"col1": -1} c1.insert(5, -1) c2.insert(5, 0) c3.insert(5, "") table.insert(5, row) assert table.nrows() == 24 assert table.ncols() == 3 assert table.is_consistent() assert all(a == b for a, b in zip(table["col1"], c1)) assert all(a == b for a, b in zip(table["col2"], c2)) assert all(a == b for a, b in zip(table["col3"], c3)) row = {"col1": -2, "col2": -3, "col3": "abc"} c1.insert(2, -2) c2.insert(2, -3) c3.insert(2, "abc") table.insert(2, row) assert table.nrows() == 25 assert table.ncols() == 3 assert table.is_consistent() assert all(a == b for a, b in zip(table["col1"], c1)) assert all(a == b for a, b in zip(table["col2"], c2)) assert all(a == b for a, b in zip(table["col3"], c3)) # Try iterating through table rows for i in range(table.nrows()): row = table[i] assert row["col1"] == c1[i] assert row["col2"] == c2[i] assert row["col3"] == c3[i] # Trying setting some rows row = {"col1": 100} table[2] = row assert table[2]["col1"] == 100 assert table[2]["col2"] == c2[2] assert table[2]["col3"] == c3[2] row = {"col1": 1000, "col2": 2000, "col3": "hello"} table[10] = row assert table[10]["col1"] == 1000 assert table[10]["col2"] == 2000 assert table[10]["col3"] == "hello"
def tst_resizing(self): from dials.array_family import flex # Create a table with 2 empty columns table = flex.reflection_table() assert (table.empty()) table['col1'] = flex.int() table['col2'] = flex.double() assert (table.nrows() == 0) assert (table.ncols() == 2) assert (not table.empty()) assert ('col1' in table) assert ('col2' in table) assert ('col3' not in table) print 'OK' # Create a table with 2 columns and 10 rows table = flex.reflection_table() table['col1'] = flex.int(10) table['col2'] = flex.double(10) assert (table.nrows() == 10) assert (table.ncols() == 2) print 'OK' # Add an extra column with the wrong size (throw) try: table['col3'] = flex.std_string(20) assert (False) except Exception: pass assert (table.nrows() == 10) assert (table.ncols() == 2) assert (table.is_consistent()) assert (len(table['col1']) == 10) assert (len(table['col2']) == 10) assert len(table) == table.size() print 'OK' # Resize the table (should resize all columns) table.resize(50) assert (table.nrows() == 50) assert (table.ncols() == 2) assert (table.is_consistent()) assert (len(table['col1']) == 50) assert (len(table['col2']) == 50) print 'OK' # Make the table inconsistent table['col1'].resize(40) assert (not table.is_consistent()) assert_exception(lambda: table.nrows()) assert_exception(lambda: table.ncols()) print 'OK' # Clear the table table.clear() assert (table.is_consistent()) assert (table.empty()) assert (table.nrows() == 0) assert (table.ncols() == 0) print 'OK'
def test_set_selected(): # The columns as lists c1 = list(range(10)) c2 = list(range(10)) c3 = ["a", "b", "c", "d", "e", "f", "g", "i", "j", "k"] # Create a table with some elements table1 = flex.reflection_table() table2 = flex.reflection_table() table1["col1"] = flex.int(c1) table2["col2"] = flex.double(c2) table2["col3"] = flex.std_string(c3) # Set selected columns table1.set_selected(("col3", "col2"), table2) assert table1.nrows() == 10 assert table1.ncols() == 3 assert all(a == b for a, b in zip(table1["col1"], c1)) assert all(a == b for a, b in zip(table1["col2"], c2)) assert all(a == b for a, b in zip(table1["col3"], c3)) # Set selected columns table1 = flex.reflection_table() table1["col1"] = flex.int(c1) table1.set_selected(flex.std_string(["col3", "col2"]), table2) assert table1.nrows() == 10 assert table1.ncols() == 3 assert all(a == b for a, b in zip(table1["col1"], c1)) assert all(a == b for a, b in zip(table1["col2"], c2)) assert all(a == b for a, b in zip(table1["col3"], c3)) cc1 = list(range(10, 15)) cc2 = list(range(10, 15)) cc3 = ["l", "m", "n", "o", "p"] # Set selected rows table2 = flex.reflection_table() table2["col1"] = flex.int(cc1) table2["col2"] = flex.double(cc2) table2["col3"] = flex.std_string(cc3) index = flex.size_t([0, 1, 5, 8, 9]) ccc1 = copy.deepcopy(c1) ccc2 = copy.deepcopy(c2) ccc3 = copy.deepcopy(c3) for j, i in enumerate(index): ccc1[i] = cc1[j] ccc2[i] = cc2[j] ccc3[i] = cc3[j] table1.set_selected(index, table2) assert all(a == b for a, b in zip(table1["col1"], ccc1)) assert all(a == b for a, b in zip(table1["col2"], ccc2)) assert all(a == b for a, b in zip(table1["col3"], ccc3)) # Set selected rows table2 = flex.reflection_table() table2["col1"] = flex.int(cc1) table2["col2"] = flex.double(cc2) table2["col3"] = flex.std_string(cc3) table1.set_selected(index, table2) assert all(a == b for a, b in zip(table1["col1"], ccc1)) assert all(a == b for a, b in zip(table1["col2"], ccc2)) assert all(a == b for a, b in zip(table1["col3"], ccc3))
def export_mtz( integrated_data, experiment_list, hklout, ignore_panels=False, include_partials=False, keep_partials=False, min_isigi=None, force_static_model=False, filter_ice_rings=False, ): """Export data from integrated_data corresponding to experiment_list to an MTZ file hklout.""" from dials.array_family import flex # for the moment assume (and assert) that we will convert data from exactly # one lattice... # FIXME allow for more than one experiment in here: this is fine just add # multiple MTZ data sets (DIALS1...DIALSN) and multiple batch headers: one # range of batches for each experiment assert len(experiment_list) == 1 # select reflections that are assigned to an experiment (i.e. non-negative id) integrated_data = integrated_data.select(integrated_data["id"] >= 0) assert max(integrated_data["id"]) == 0 # strip out negative variance reflections: these should not really be there # FIXME Doing select on summation results. Should do on profile result if # present? Yes if "intensity.prf.variance" in integrated_data: selection = integrated_data.get_flags(integrated_data.flags.integrated, all=True) else: selection = integrated_data.get_flags(integrated_data.flags.integrated_sum) integrated_data = integrated_data.select(selection) selection = integrated_data["intensity.sum.variance"] <= 0 if selection.count(True) > 0: integrated_data.del_selected(selection) logger.info("Removing %d reflections with negative variance" % selection.count(True)) if "intensity.prf.variance" in integrated_data: selection = integrated_data["intensity.prf.variance"] <= 0 if selection.count(True) > 0: integrated_data.del_selected(selection) logger.info("Removing %d profile reflections with negative variance" % selection.count(True)) if filter_ice_rings: selection = integrated_data.get_flags(integrated_data.flags.in_powder_ring) integrated_data.del_selected(selection) logger.info("Removing %d reflections in ice ring resolutions" % selection.count(True)) if min_isigi is not None: selection = ( integrated_data["intensity.sum.value"] / flex.sqrt(integrated_data["intensity.sum.variance"]) ) < min_isigi integrated_data.del_selected(selection) logger.info("Removing %d reflections with I/Sig(I) < %s" % (selection.count(True), min_isigi)) if "intensity.prf.variance" in integrated_data: selection = ( integrated_data["intensity.prf.value"] / flex.sqrt(integrated_data["intensity.prf.variance"]) ) < min_isigi integrated_data.del_selected(selection) logger.info("Removing %d profile reflections with I/Sig(I) < %s" % (selection.count(True), min_isigi)) # FIXME in here work on including partial reflections => at this stage best # to split off the partial refections into a different selection & handle # gracefully... better to work on a short list as will need to "pop" them & # find matching parts to combine. if include_partials: integrated_data = sum_partial_reflections(integrated_data) integrated_data = scale_partial_reflections(integrated_data) if "partiality" in integrated_data: selection = integrated_data["partiality"] < 0.99 if selection.count(True) > 0 and not keep_partials: integrated_data.del_selected(selection) logger.info("Removing %d incomplete reflections" % selection.count(True)) # FIXME TODO for more than one experiment into an MTZ file: # # - add an epoch (or recover an epoch) from the scan and add this as an extra # column to the MTZ file for scaling, so we know that the two lattices were # integrated at the same time # - decide a sensible BATCH increment to apply to the BATCH value between # experiments and add this # # At the moment this is probably enough to be working on. experiment = experiment_list[0] # also only work with one panel(for the moment) if not ignore_panels: assert len(experiment.detector) == 1 from scitbx import matrix if experiment.goniometer: axis = matrix.col(experiment.goniometer.get_rotation_axis()) else: axis = 0.0, 0.0, 0.0 s0 = experiment.beam.get_s0() wavelength = experiment.beam.get_wavelength() panel = experiment.detector[0] origin = matrix.col(panel.get_origin()) fast = matrix.col(panel.get_fast_axis()) slow = matrix.col(panel.get_slow_axis()) pixel_size = panel.get_pixel_size() fast *= pixel_size[0] slow *= pixel_size[1] cb_op_to_ref = experiment.crystal.get_space_group().info().change_of_basis_op_to_reference_setting() experiment.crystal = experiment.crystal.change_basis(cb_op_to_ref) U = experiment.crystal.get_U() if experiment.goniometer is not None: F = matrix.sqr(experiment.goniometer.get_fixed_rotation()) else: F = matrix.sqr((1, 0, 0, 0, 1, 0, 0, 0, 1)) unit_cell = experiment.crystal.get_unit_cell() from iotbx import mtz from scitbx.array_family import flex from math import floor, sqrt m = mtz.object() m.set_title("from dials.export_mtz") m.set_space_group_info(experiment.crystal.get_space_group().info()) if experiment.scan: image_range = experiment.scan.get_image_range() else: image_range = 1, 1 # pointless (at least) doesn't like batches starting from zero b_incr = max(image_range[0], 1) for b in range(image_range[0], image_range[1] + 1): o = m.add_batch().set_num(b + b_incr).set_nbsetid(1).set_ncryst(1) o.set_time1(0.0).set_time2(0.0).set_title("Batch %d" % (b + b_incr)) o.set_ndet(1).set_theta(flex.float((0.0, 0.0))).set_lbmflg(0) o.set_alambd(wavelength).set_delamb(0.0).set_delcor(0.0) o.set_divhd(0.0).set_divvd(0.0) # FIXME hard-coded assumption on indealized beam vector below... this may be # broken when we come to process data from a non-imgCIF frame o.set_so(flex.float(s0)).set_source(flex.float((0, 0, -1))) # these are probably 0, 1 respectively, also flags for how many are set, sd o.set_bbfac(0.0).set_bscale(1.0) o.set_sdbfac(0.0).set_sdbscale(0.0).set_nbscal(0) # unit cell (this is fine) and the what-was-refined-flags FIXME hardcoded # take time-varying parameters from the *end of the frame* unlikely to # be much different at the end - however only exist if time-varying refinement # was used if not force_static_model and experiment.crystal.num_scan_points > 0: _unit_cell = experiment.crystal.get_unit_cell_at_scan_point(b - image_range[0]) _U = experiment.crystal.get_U_at_scan_point(b - image_range[0]) else: _unit_cell = unit_cell _U = U # apply the fixed rotation to this to unify matrix definitions - F * U # was what was used in the actual prediction: U appears to be stored # as the transpose?! At least is for Mosflm... # # FIXME Do we need to apply the setting rotation here somehow? i.e. we have # the U.B. matrix assuming that the axis is equal to S * axis_datum but # here we are just giving the effective axis so at scan angle 0 this will # not be correct... FIXME 2 not even sure we can express the stack of # matrices S * R * F * U * B in MTZ format?... _U = dials_u_to_mosflm(F * _U, _unit_cell) # FIXME need to get what was refined and what was constrained from the # crystal model o.set_cell(flex.float(_unit_cell.parameters())) o.set_lbcell(flex.int((-1, -1, -1, -1, -1, -1))) o.set_umat(flex.float(_U.transpose().elems)) # get the mosaic spread though today it may not actually be set mosaic = experiment.crystal.get_mosaicity() o.set_crydat(flex.float([mosaic, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])) o.set_lcrflg(0) o.set_datum(flex.float((0.0, 0.0, 0.0))) # detector size, distance o.set_detlm(flex.float([0.0, panel.get_image_size()[0], 0.0, panel.get_image_size()[1], 0, 0, 0, 0])) o.set_dx(flex.float([panel.get_directed_distance(), 0.0])) # goniometer axes and names, and scan axis number, and number of axes, missets o.set_e1(flex.float(axis)) o.set_e2(flex.float((0.0, 0.0, 0.0))) o.set_e3(flex.float((0.0, 0.0, 0.0))) o.set_gonlab(flex.std_string(("AXIS", "", ""))) o.set_jsaxs(1) o.set_ngonax(1) o.set_phixyz(flex.float((0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) # scan ranges, axis if experiment.scan: phi_start, phi_range = experiment.scan.get_image_oscillation(b) else: phi_start, phi_range = 0.0, 0.0 o.set_phistt(phi_start) o.set_phirange(phi_range) o.set_phiend(phi_start + phi_range) o.set_scanax(flex.float(axis)) # number of misorientation angles o.set_misflg(0) # crystal axis closest to rotation axis (why do I want this?) o.set_jumpax(0) # type of data - 1; 2D, 2; 3D, 3; Laue o.set_ldtype(2) # now create the actual data structures - first keep a track of the columns # H K L M/ISYM BATCH I SIGI IPR SIGIPR FRACTIONCALC XDET YDET ROT WIDTH # LP MPART FLAG BGPKRATIOS from cctbx.array_family import flex as cflex # implicit import from cctbx.miller import map_to_asu_isym # implicit import # gather the required information for the reflection file nref = len(integrated_data["miller_index"]) x_px, y_px, z_px = integrated_data["xyzcal.px"].parts() xdet = flex.double(x_px) ydet = flex.double(y_px) zdet = flex.double(z_px) # compute ROT values if experiment.scan: rot = flex.double([experiment.scan.get_angle_from_image_index(z) for z in zdet]) else: rot = zdet # compute BATCH values batch = flex.floor(zdet).iround() + 1 + b_incr # we're working with full reflections so... fractioncalc = flex.double(nref, 1.0) # now go for it and make an MTZ file... x = m.add_crystal("XTAL", "DIALS", unit_cell.parameters()) d = x.add_dataset("FROMDIALS", wavelength) # now add column information... # FIXME add DIALS_FLAG which can include e.g. was partial etc. type_table = { "H": "H", "K": "H", "L": "H", "I": "J", "SIGI": "Q", "IPR": "J", "SIGIPR": "Q", "BG": "R", "SIGBG": "R", "XDET": "R", "YDET": "R", "BATCH": "B", "BGPKRATIOS": "R", "WIDTH": "R", "MPART": "I", "M_ISYM": "Y", "FLAG": "I", "LP": "R", "FRACTIONCALC": "R", "ROT": "R", "DQE": "R", } # derive index columns from original indices with # # from m.replace_original_index_miller_indices # # so all that is needed now is to make space for the reflections - fill with # zeros... m.adjust_column_array_sizes(nref) m.set_n_reflections(nref) # assign H, K, L, M_ISYM space for column in "H", "K", "L", "M_ISYM": d.add_column(column, type_table[column]).set_values(flex.double(nref, 0.0).as_float()) m.replace_original_index_miller_indices(cb_op_to_ref.apply(integrated_data["miller_index"])) d.add_column("BATCH", type_table["BATCH"]).set_values(batch.as_double().as_float()) if "lp" in integrated_data: lp = integrated_data["lp"] else: lp = flex.double(nref, 1.0) if "dqe" in integrated_data: dqe = integrated_data["dqe"] else: dqe = flex.double(nref, 1.0) I_profile = None V_profile = None I_sum = None V_sum = None # FIXME errors in e.g. LP correction need to be propogated here scl = lp / dqe if "intensity.prf.value" in integrated_data: I_profile = integrated_data["intensity.prf.value"] * scl V_profile = integrated_data["intensity.prf.variance"] * scl * scl # Trap negative variances assert V_profile.all_gt(0) d.add_column("IPR", type_table["I"]).set_values(I_profile.as_float()) d.add_column("SIGIPR", type_table["SIGI"]).set_values(flex.sqrt(V_profile).as_float()) if "intensity.sum.value" in integrated_data: I_sum = integrated_data["intensity.sum.value"] * scl V_sum = integrated_data["intensity.sum.variance"] * scl * scl # Trap negative variances assert V_sum.all_gt(0) d.add_column("I", type_table["I"]).set_values(I_sum.as_float()) d.add_column("SIGI", type_table["SIGI"]).set_values(flex.sqrt(V_sum).as_float()) if "background.sum.value" in integrated_data and "background.sum.variance" in integrated_data: bg = integrated_data["background.sum.value"] varbg = integrated_data["background.sum.variance"] assert (varbg >= 0).count(False) == 0 sigbg = flex.sqrt(varbg) d.add_column("BG", type_table["BG"]).set_values(bg.as_float()) d.add_column("SIGBG", type_table["SIGBG"]).set_values(sigbg.as_float()) d.add_column("FRACTIONCALC", type_table["FRACTIONCALC"]).set_values(fractioncalc.as_float()) d.add_column("XDET", type_table["XDET"]).set_values(xdet.as_float()) d.add_column("YDET", type_table["YDET"]).set_values(ydet.as_float()) d.add_column("ROT", type_table["ROT"]).set_values(rot.as_float()) d.add_column("LP", type_table["LP"]).set_values(lp.as_float()) d.add_column("DQE", type_table["DQE"]).set_values(dqe.as_float()) m.write(hklout) return m
def export_mtz(observed_hkls, experiment, filename): if experiment.goniometer: axis = experiment.goniometer.get_rotation_axis() else: axis = 0.0, 0.0, 0.0 s0 = experiment.beam.get_s0() wavelength = experiment.beam.get_wavelength() from scitbx import matrix panel = experiment.detector[0] pixel_size = panel.get_pixel_size() cb_op_to_ref = (experiment.crystal.get_space_group().info(). change_of_basis_op_to_reference_setting()) experiment.crystal = experiment.crystal.change_basis(cb_op_to_ref) from iotbx import mtz from scitbx.array_family import flex import itertools m = mtz.object() m.set_title("from dials.scratch.mg.strategy_i19") m.set_space_group_info(experiment.crystal.get_space_group().info()) nrefcount = sum(observed_hkls.itervalues()) nref = max(observed_hkls.itervalues()) for batch in range(1, nref + 1): o = m.add_batch().set_num(batch).set_nbsetid(1).set_ncryst(1) o.set_time1(0.0).set_time2(0.0).set_title("Batch %d" % batch) o.set_ndet(1).set_theta(flex.float((0.0, 0.0))).set_lbmflg(0) o.set_alambd(wavelength).set_delamb(0.0).set_delcor(0.0) o.set_divhd(0.0).set_divvd(0.0) o.set_so(flex.float(s0)).set_source(flex.float((0, 0, -1))) o.set_bbfac(0.0).set_bscale(1.0) o.set_sdbfac(0.0).set_sdbscale(0.0).set_nbscal(0) _unit_cell = experiment.crystal.get_unit_cell() _U = experiment.crystal.get_U() o.set_cell(flex.float(_unit_cell.parameters())) o.set_lbcell(flex.int((-1, -1, -1, -1, -1, -1))) o.set_umat(flex.float(_U.transpose().elems)) mosaic = experiment.crystal.get_mosaicity() o.set_crydat( flex.float([ mosaic, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ])) o.set_lcrflg(0) o.set_datum(flex.float((0.0, 0.0, 0.0))) # detector size, distance o.set_detlm( flex.float([ 0.0, panel.get_image_size()[0], 0.0, panel.get_image_size()[1], 0, 0, 0, 0, ])) o.set_dx(flex.float([panel.get_directed_distance(), 0.0])) # goniometer axes and names, and scan axis number, and number of axes, missets o.set_e1(flex.float(axis)) o.set_e2(flex.float((0.0, 0.0, 0.0))) o.set_e3(flex.float((0.0, 0.0, 0.0))) o.set_gonlab(flex.std_string(("AXIS", "", ""))) o.set_jsaxs(1) o.set_ngonax(1) o.set_phixyz(flex.float((0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) phi_start, phi_range = 0.0, 0.0 o.set_phistt(phi_start) o.set_phirange(phi_range) o.set_phiend(phi_start + phi_range) o.set_scanax(flex.float(axis)) # number of misorientation angles o.set_misflg(0) # crystal axis closest to rotation axis (why do I want this?) o.set_jumpax(0) # type of data - 1; 2D, 2; 3D, 3; Laue o.set_ldtype(2) # now create the actual data structures - first keep a track of the columns # H K L M/ISYM BATCH I SIGI IPR SIGIPR FRACTIONCALC XDET YDET ROT WIDTH # LP MPART FLAG BGPKRATIOS from cctbx.array_family import flex as cflex # implicit import # now go for it and make an MTZ file... x = m.add_crystal("XTAL", "DIALS", unit_cell.parameters()) d = x.add_dataset("FROMDIALS", wavelength) # now add column information... type_table = { "IPR": "J", "BGPKRATIOS": "R", "WIDTH": "R", "I": "J", "H": "H", "K": "H", "MPART": "I", "L": "H", "BATCH": "B", "M_ISYM": "Y", "SIGI": "Q", "FLAG": "I", "XDET": "R", "LP": "R", "YDET": "R", "SIGIPR": "Q", "FRACTIONCALC": "R", "ROT": "R", } m.adjust_column_array_sizes(nrefcount) m.set_n_reflections(nrefcount) # assign H, K, L, M_ISYM space for column in "H", "K", "L", "M_ISYM": d.add_column(column, type_table[column]).set_values( flex.float(nrefcount, 0.0)) batchnums = (_ for (x, n) in observed_hkls.iteritems() for _ in range(1, n + 1)) d.add_column("BATCH", type_table["BATCH"]).set_values(flex.float(batchnums)) d.add_column("FRACTIONCALC", type_table["FRACTIONCALC"]).set_values( flex.float(nrefcount, 3.0)) m.replace_original_index_miller_indices( cb_op_to_ref.apply( cflex.miller_index([ _ for (x, n) in observed_hkls.iteritems() for _ in itertools.repeat(x, n) ]))) m.write(filename) return m