def vertices_and_indices(): icosahedron = mesh.Mesh.from_file(output('subdivided-5.stl')) vertices = OrderedSet() for face in icosahedron: v1 = face[0:3] v2 = face[3:6] v3 = face[6:9] vertices.add(tuple(v1)) vertices.add(tuple(v2)) vertices.add(tuple(v3)) with open(output("vertices"), 'w') as vertices_output: for vertex in vertices: theta, phi = spherical(vertex) vertices_output.write(f"{theta}, {phi},\n") with open(output("indices"), 'w') as indices_output: for face in icosahedron: v1 = vertices.index(tuple(face[0:3])) v2 = vertices.index(tuple(face[3:6])) v3 = vertices.index(tuple(face[6:9])) indices_output.write(f"{v1}, {v2}, {v3},\n") return vertices
def buildDataset(cls, all_results: Dataset, testie: Testie) -> List[tuple]: dtype = testie.variables.dtype() y = OrderedDict() dataset = [] for i, (run, results_types) in enumerate(all_results.items()): vars = list(run.variables.values()) if not results_types is None and len(results_types) > 0: dataset.append(vars) for result_type, results in results_types.items(): r = np.mean(results) y.setdefault(result_type, []).append(r) dtype['values'] = [None] * len(dtype['formats']) for i, f in enumerate(dtype['formats']): if f is str: dtype['formats'][i] = int values = OrderedSet() for row in dataset: values.add(row[i]) row[i] = values.index(row[i]) dtype['values'][i] = list(values) X = np.array(dataset, ndmin=2) lset = [] for result_type, v in y.items(): lset.append((result_type, X, np.array(v),dtype)) return lset
def __Dedupe(items, key=None): seen = OrderedSet() num_seen = list() gn_item = (item for item in items) while True: try: item = gn_item.next() except Exception as e: yield (None, num_seen) break else: val = item if key is None else key(item) if val not in seen: yield (item, None) seen.add(val) num_seen.append(1) else: num_seen[seen.index(val)] += 1
def vectorize(sentence): # set of unique words in the whole document. unique_words = OrderedSet() for sent in sentence: for word in sent: unique_words.add(word) unique_words = list( unique_words ) # converting the set to a list to make it easier to work with it. #print(unique_words, len(unique_words)) # a list of lists that contains the vectorized form of each sentence in the document. vector = list() # in the vectorized representation, we consider the bag of words (unique words in the text). # then, we count the occurenc of each word in a sentence and represent it in a vector whose length = length(unique_words) # ex: sent1 = "i am a boy" # sent2 = "i am a girl" # unique_words = ["i", "am", "a", "boy", "girl"] # vector representation of sent1 = [1, 1, 1, 1, 0] # vector representation of sent2 = [1, 1, 1, 0, 1] for sent in sentence: # iterate for every sentence in the document temp_vector = [0] * len( unique_words ) # create a temporary vector to calculate the occurence of each word in that sentence. for word in sent: # iterate for every word in the sentence. temp_vector[unique_words.index(word)] += 1 vector.append( temp_vector ) # add the temporary vector to the list of vectors for each sentence (list of lists) #print(vector) return vector, unique_words
def buildDataset(cls, all_results: Dataset, testie: Testie): dtype = testie.variables.dtype() y = [] dataset = [] for i, (run, results) in enumerate(all_results.items()): vars = list(run.variables.values()) if not results is None: dataset.append(vars) y.append(np.mean(results)) dtype['formats'] = dtype['formats'] dtype['names'] = dtype['names'] for i, f in enumerate(dtype['formats']): if f is str: dtype['formats'][i] = int values = OrderedSet() for row in dataset: values.add(row[i]) row[i] = values.index(row[i]) X = np.array(dataset, ndmin=2) return X, np.array(y, dtype=[('result', float)])
def update_imageset_ids(experiments, reflections): """For a list of input experiments and reflections (each containing one sweep), update or add the imageset_id column to the data to match the order in the experiment list. This means that when the reflection tables are combined, the data is correct. """ # input a list of ordered matching experiments and reflection tables. next_iset_id = 0 imagesets_found = OrderedSet() for expt, table in zip(experiments, reflections): if "imageset_id" in table: assert len(set(table["imageset_id"])) == 1 iset = expt.imageset if iset not in imagesets_found: imagesets_found.add(iset) table["imageset_id"] = flex.int(table.size(), next_iset_id) next_iset_id += 1 else: iset_id = imagesets_found.index(iset) table["imageset_id"] = flex.int(table.size(), iset_id) return reflections
orient='split') #table.to_json('file.json',orient='split') #print json_str chk2 = json.loads(json_str) #json.load(open('file.json')) yrs = OrderedSet([i[0] for i in chk2['index']]) items = OrderedSet([i[1] for i in chk2['index']]) metrics = OrderedSet([i[0] for i in chk2['columns']]) mths = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] indices = chk2['index'] datum = chk2['data'] cols = chk2['columns'] cuts = [len(datum[0]) / (len(metrics) - metrics.index(i)) for i in metrics] #print cuts productDict = {k: dict() for k in items} for i in yrs: for j in productDict.values(): j[i] = [] for i in metrics: for j in yrs: for k in items: dataitems = [ chk2['data'][z] for z in np.where(np.array(chk2['index']) == k)[0] ]
def run_integration(params, experiments, reference=None): """Perform the integration. Returns: experiments: The integrated experiments reflections: The integrated reflections report(optional): An integration report. Raises: ValueError: For a number of bad inputs RuntimeError: If the profile model creation fails """ predicted = None rubbish = None for abs_params in params.absorption_correction: if abs_params.apply: if not ( params.integration.debug.output and not params.integration.debug.separate_files ): raise ValueError( "Shoeboxes must be saved to integration intermediates to apply an absorption correction. " + "Set integration.debug.output=True, integration.debug.separate_files=False and " + "integration.debug.delete_shoeboxes=True to temporarily store shoeboxes." ) # Print if we're using a mask for i, exp in enumerate(experiments): mask = exp.imageset.external_lookup.mask if mask.filename is not None: if mask.data: logger.info("Using external mask: %s", mask.filename) for tile in mask.data: logger.info(" Mask has %d pixels masked", tile.data().count(False)) # Print the experimental models for i, exp in enumerate(experiments): summary = "\n".join( ( "", "=" * 80, "", "Experiments", "", "Models for experiment %d" % i, "", str(exp.beam), str(exp.detector), ) ) if exp.goniometer: summary += str(exp.goniometer) + "\n" if exp.scan: summary += str(exp.scan) + "\n" summary += str(exp.crystal) logger.info(summary) logger.info("\n".join(("", "=" * 80, ""))) logger.info(heading("Initialising")) # Load the data if reference: reference, rubbish = process_reference(reference) # Check pixels don't belong to neighbours if exp.goniometer is not None and exp.scan is not None: reference = filter_reference_pixels(reference, experiments) # Modify experiment list if scan range is set. experiments, reference = split_for_scan_range( experiments, reference, params.scan_range ) # Modify experiment list if exclude images is set if params.exclude_images: for experiment in experiments: for index in params.exclude_images: experiment.imageset.mark_for_rejection(index, True) # Predict the reflections logger.info("\n".join(("", "=" * 80, ""))) logger.info(heading("Predicting reflections")) predicted = flex.reflection_table.from_predictions_multi( experiments, dmin=params.prediction.d_min, dmax=params.prediction.d_max, margin=params.prediction.margin, force_static=params.prediction.force_static, padding=params.prediction.padding, ) isets = OrderedSet(e.imageset for e in experiments) predicted["imageset_id"] = flex.int(predicted.size(), 0) if len(isets) > 1: for e in experiments: iset_id = isets.index(e.imageset) for id_ in predicted.experiment_identifiers().keys(): identifier = predicted.experiment_identifiers()[id_] if identifier == e.identifier: sel = predicted["id"] == id_ predicted["imageset_id"].set_selected(sel, iset_id) break # Match reference with predicted if reference: matched, reference, unmatched = predicted.match_with_reference(reference) assert len(matched) == len(predicted) assert matched.count(True) <= len(reference) if matched.count(True) == 0: raise ValueError( """ Invalid input for reference reflections. Zero reference spots were matched to predictions """ ) elif unmatched: msg = ( "Warning: %d reference spots were not matched to predictions" % unmatched.size() ) border = "\n".join(("", "*" * 80, "")) logger.info("".join((border, msg, border))) rubbish.extend(unmatched) if len(experiments) > 1: # filter out any experiments without matched reference reflections # f_: filtered f_reference = flex.reflection_table() f_predicted = flex.reflection_table() f_rubbish = flex.reflection_table() f_experiments = ExperimentList() good_expt_count = 0 def refl_extend(src, dest, eid): old_id = eid new_id = good_expt_count tmp = src.select(src["id"] == old_id) tmp["id"] = flex.int(len(tmp), good_expt_count) if old_id in tmp.experiment_identifiers(): identifier = tmp.experiment_identifiers()[old_id] del tmp.experiment_identifiers()[old_id] tmp.experiment_identifiers()[new_id] = identifier dest.extend(tmp) for expt_id, experiment in enumerate(experiments): if len(reference.select(reference["id"] == expt_id)) != 0: refl_extend(reference, f_reference, expt_id) refl_extend(predicted, f_predicted, expt_id) refl_extend(rubbish, f_rubbish, expt_id) f_experiments.append(experiment) good_expt_count += 1 else: logger.info( "Removing experiment %d: no reference reflections matched to predictions", expt_id, ) reference = f_reference predicted = f_predicted experiments = f_experiments rubbish = f_rubbish # Select a random sample of the predicted reflections if not params.sampling.integrate_all_reflections: predicted = sample_predictions(experiments, predicted, params) # Compute the profile model - either load existing or compute # can raise RuntimeError experiments = ProfileModelFactory.create(params, experiments, reference) for expr in experiments: if expr.profile is None: raise ValueError("No profile information in experiment list") del reference # Compute the bounding box predicted.compute_bbox(experiments) # Create the integrator integrator = create_integrator(params, experiments, predicted) # Integrate the reflections reflections = integrator.integrate() # Remove unintegrated reflections if not params.output.output_unintegrated_reflections: keep = reflections.get_flags(reflections.flags.integrated, all=False) logger.info( "Removing %d unintegrated reflections of %d total", keep.count(False), keep.size(), ) reflections = reflections.select(keep) # Append rubbish data onto the end if rubbish is not None and params.output.include_bad_reference: mask = flex.bool(len(rubbish), True) rubbish.unset_flags(mask, rubbish.flags.integrated_sum) rubbish.unset_flags(mask, rubbish.flags.integrated_prf) rubbish.set_flags(mask, rubbish.flags.bad_reference) reflections.extend(rubbish) # Correct integrated intensities for absorption correction, if necessary for abs_params in params.absorption_correction: if abs_params.apply and abs_params.algorithm == "fuller_kapton": from dials.algorithms.integration.kapton_correction import ( multi_kapton_correction, ) experiments, reflections = multi_kapton_correction( experiments, reflections, abs_params.fuller_kapton, logger=logger )() if params.significance_filter.enable: from dials.algorithms.integration.stills_significance_filter import ( SignificanceFilter, ) sig_filter = SignificanceFilter(params) filtered_refls = sig_filter(experiments, reflections) accepted_expts = ExperimentList() accepted_refls = flex.reflection_table() logger.info( "Removed %d reflections out of %d when applying significance filter", (reflections.size() - filtered_refls.size()), reflections.size(), ) for expt_id, expt in enumerate(experiments): refls = filtered_refls.select(filtered_refls["id"] == expt_id) if refls: accepted_expts.append(expt) current_id = expt_id new_id = len(accepted_expts) - 1 refls["id"] = flex.int(len(refls), new_id) if expt.identifier: del refls.experiment_identifiers()[current_id] refls.experiment_identifiers()[new_id] = expt.identifier accepted_refls.extend(refls) else: logger.info( "Removed experiment %d which has no reflections left after applying significance filter", expt_id, ) if not accepted_refls: raise ValueError("No reflections left after applying significance filter") experiments = accepted_expts reflections = accepted_refls # Write a report if requested report = None if params.output.report is not None: report = integrator.report() return experiments, reflections, report
# Acquire new column names new_cols = OrderedSet() for col in df: for val in df[col]: title = col + "_" + val new_cols.add(title) all_new_rows = [] for idx, row in df.iterrows(): new_row = [0] * len(new_cols) row_dict = row.to_dict() for key in row_dict: val = key + "_" + row_dict[key] match_idx = new_cols.index(val) new_row[match_idx] = 1 all_new_rows.append(new_row) data = np.array(all_new_rows) new_df = pd.DataFrame(data=data, columns=new_cols) input_dataset = np.matrix(new_df.loc[:, 'cap-shape_x':]) output_dataset = np.matrix(new_df.loc[:, 'class_p':'class_p']) input_dataset = input_dataset[:][:1000] output_dataset = output_dataset[:][:1000] normed_input = (input_dataset - input_dataset.mean()) / input_dataset.std()
class TfidfVectorizer(): def __init__(self): self.word_ordered_set = OrderedSet() self.dictionary = {} self.dictionary_len = len(self.dictionary) pass def fit(self, raw_documents): """ build(or add) the dictionary in class Parameters ------------ raw_documents : iterable """ for text in raw_documents: words = word_tokenizer(text) for word in words: self.word_ordered_set.add(word) for word in self.word_ordered_set: self.dictionary[word] = self.word_ordered_set.index(word) self.dictionary_len = len(self.dictionary) def fit_transform(self, raw_documents): self.fit(raw_documents) return self.transform(raw_documents) def transform(self, raw_documents): # def get_TD_IDE_mat(raw_documents): """ Parameters ------------ raw_documents : iterable of string Returns ----------- df_idf_mat : TD_IDE矩阵,行数为文段数量,列数为不重复单词数量 """ # 计算矩阵维度 row_d = len(raw_documents) column_d = self.dictionary_len+1 # print(row_d, column_d) # 初始化矩阵 num_mat = np.zeros((row_d, column_d)) df_mat = np.zeros((row_d, column_d)) idf_mat = np.zeros((1, column_d)) # 遍历每一行,计算num_mat, num_mat[i][j]表示第i行句子中单词j出现的个数 for row_index, row in enumerate(raw_documents): words = word_tokenizer(row) for word in words: if word in self.dictionary: num_mat[row_index][[self.dictionary[word]]] += 1 else : num_mat[row_index][[self.dictionary_len]] = 1 # 计算df矩阵 df_mat = num_mat/num_mat.sum(axis=1).reshape(row_d, 1) # 计算idf值 count_mat = num_mat count_mat[count_mat != 0] = 1 idf_mat = np.log(row_d/(count_mat.sum(axis=0)+1).reshape(1, column_d)) # 得到了df_mat, idf_mat df_idf_mat = df_mat * idf_mat return df_idf_mat
class CountVectorizer(): """ Parameters ----------- """ def __init__(self): # 这一个表是用来查同义词的,可以将一些具有相同意义的字符串转换成一样的字符串 self.lookup_table = {} # TODO: 待完善 self.comfused_words_set = set() self.not_comfused_words_set = set() self.word_ordered_set = OrderedSet() self.dictionary = {} self.dictionary_len = len(self.dictionary) pass def fit(self, raw_documents): """ build(or add) the dictionary in class Parameters ------------ raw_documents : iterable """ for text in raw_documents: words = word_tokenizer(text) for word in words: self.word_ordered_set.add(word) for word in self.word_ordered_set: self.dictionary[word] = self.word_ordered_set.index(word) self.dictionary_len = len(self.dictionary) def transform(self, raw_documents): """ Transform documents to document-term matrix. Extract token counts out of raw text documents using the vocabulary fitted with fit or the one provided to the constructor. Parameters ------------ raw_documents : iterable Returns -------- X : 2d array-like sparse matrix, shape(n_samples, n_features) """ text_matrix = np.zeros((len(raw_documents), self.dictionary_len+1)) for i, document in enumerate(raw_documents): words = word_tokenizer(document) for word in words: if word in self.dictionary: text_matrix[i,self.dictionary[word]] += 1 else : text_matrix[i,self.dictionary_len] = 1 return text_matrix def fit_transform(self, raw_documents): """ Transform documents to document-term matrix. This is equivalent to fit followed by transform, but more efficiently implemented. Parameters ------------ raw_documents : iterable Returns -------- X : 2d array-like sparse matrix, shape(n_samples, n_features) """ self.fit(raw_documents) return self.transform(raw_documents)
def getBarchartData(self, datum1, datum2): # data = json.dumps(datum1) df = pd.DataFrame(datum1) #df = pd.read_csv('test1.csv') #df = pd.read_json(json.loads(sys.argv[1]), orient='index') yrs = df['yr'].unique() df["MTH"] = pd.Categorical(df['MTH'], [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]) table = pd.pivot_table(df, index=['yr', 'ITEM_CLASS'], columns=['MTH'], aggfunc=np.sum) json_str = table.to_json( orient='split') #table.to_json('file.json',orient='split') #print json_str chk2 = json.loads(json_str) #json.load(open('file.json')) yrs = OrderedSet([i[0] for i in chk2['index']]) items = OrderedSet([i[1] for i in chk2['index']]) metrics = OrderedSet([i[0] for i in chk2['columns']]) mths = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] indices = chk2['index'] datum = chk2['data'] cols = chk2['columns'] cuts = [ len(datum[0]) / (len(metrics) - metrics.index(i)) for i in metrics ] #print cuts productDict = {k: dict() for k in items} for i in yrs: for j in productDict.values(): j[i] = [] for i in metrics: for j in yrs: for k in items: dataitems = [ chk2['data'][z] for z in np.where(np.array(chk2['index']) == k)[0] ] data_for_metric = dataitems[yrs.index( j)][cuts[metrics.index(i)] - 12:cuts[metrics.index(i)]] productDict[k][j].append({ "name": i, "data": [[mths[f], data_for_metric[f]] for f in range(len(data_for_metric))] }) seriesDict = {k: [] for k in items} # print('did it get here?') for i in metrics: for j in items: dataitems = [ chk2['data'][z] for z in np.where(np.array(chk2['index']) == j)[0] ] #print dataitems seriesDict[j].append({ "name": i, "data": [{ "name": l, "drilldown": True, "y": round( sum([ float(f) for f in dataitems[yrs.index(l)] [cuts[metrics.index(i)] - 12:cuts[metrics.index(i)]] if f is not None ]), 2) } for l in yrs] }) return json.dumps(productDict[datum2]), "-", json.dumps( seriesDict[datum2])
def run(self, args=None): """Execute the script.""" # Parse the command line params, _ = self.parser.parse_args(args, show_diff_phil=True) # Try to load the models and data if not params.input.experiments: print("No Experiments found in the input") self.parser.print_help() return if params.input.reflections: if len(params.input.reflections) != len(params.input.experiments): raise Sorry( "The number of input reflections files does not match the " "number of input experiments") reflections, experiments = reflections_and_experiments_from_files( params.input.reflections, params.input.experiments) if reflections: reflections = reflections[0] else: reflections = None experiments_template = functools.partial( params.output.template.format, prefix=params.output.experiments_prefix, maxindexlength=len(str(len(experiments) - 1)), extension="expt", ) reflections_template = functools.partial( params.output.template.format, prefix=params.output.reflections_prefix, maxindexlength=len(str(len(experiments) - 1)), extension="refl", ) if params.output.chunk_sizes: if not sum(params.output.chunk_sizes) == len(experiments): raise Sorry( "Sum of chunk sizes list (%s) not equal to number of experiments (%s)" % (sum(params.output.chunk_sizes), len(experiments))) if params.by_wavelength: if reflections: if not reflections.experiment_identifiers(): raise Sorry( "Unable to split by wavelength as no experiment " "identifiers are set in the reflection table.") if all(experiments.identifiers() == ""): raise Sorry("Unable to split by wavelength as no experiment " "identifiers are set in the experiment list.") wavelengths = match_wavelengths(experiments) for i, wl in enumerate(sorted(wavelengths.keys())): expids = [] new_exps = ExperimentList() exp_nos = wavelengths[wl] imageset_ids = [] # record imageset ids to set in refl table imagesets_found = OrderedSet() for j in exp_nos: expids.append(experiments[j].identifier) # string new_exps.append(experiments[j]) imagesets_found.add(experiments[j].imageset) imageset_ids.append( imagesets_found.index(experiments[j].imageset)) experiment_filename = experiments_template(index=i) print( f"Saving experiments with wavelength {wl} to {experiment_filename}" ) new_exps.as_json(experiment_filename) if reflections: refls = reflections.select_on_experiment_identifiers( expids) refls["imageset_id"] = flex.int(refls.size(), 0) # now set the imageset ids for k, iset_id in enumerate(imageset_ids): # select the experiment based on id (unique per sweep), # and set the imageset_id (not necessarily unique per sweep # if imageset is shared) sel = refls["id"] == k refls["imageset_id"].set_selected(sel, iset_id) reflections_filename = reflections_template(index=i) print("Saving reflections with wavelength %s to %s" % (wl, reflections_filename)) refls.as_file(reflections_filename) elif params.by_detector: assert (not params.output.chunk_size ), "chunk_size + by_detector is not implemented" if reflections is None: split_data = { detector: { "experiments": ExperimentList() } for detector in experiments.detectors() } else: split_data = { detector: { "experiments": ExperimentList(), "reflections": flex.reflection_table(), "imagesets_found": OrderedSet(), } for detector in experiments.detectors() } for i, experiment in enumerate(experiments): split_expt_id = experiments.detectors().index( experiment.detector) experiment_filename = experiments_template(index=split_expt_id) print("Adding experiment %d to %s" % (i, experiment_filename)) split_data[experiment.detector]["experiments"].append( experiment) if reflections is not None: reflections_filename = reflections_template( index=split_expt_id) split_data[experiment.detector]["imagesets_found"].add( experiment.imageset) print("Adding reflections for experiment %d to %s" % (i, reflections_filename)) if reflections.experiment_identifiers().keys(): # first find which id value corresponds to experiment in question identifier = experiment.identifier id_ = None for k in reflections.experiment_identifiers().keys(): if reflections.experiment_identifiers( )[k] == identifier: id_ = k break if id_ is None: raise Sorry( "Unable to find id matching experiment identifier in reflection table." ) ref_sel = reflections.select(reflections["id"] == id_) # now reset ids and reset/update identifiers map for k in ref_sel.experiment_identifiers().keys(): del ref_sel.experiment_identifiers()[k] new_id = len( split_data[experiment.detector]["experiments"]) - 1 ref_sel["id"] = flex.int(len(ref_sel), new_id) ref_sel.experiment_identifiers()[new_id] = identifier else: ref_sel = reflections.select(reflections["id"] == i) ref_sel["id"] = flex.int( len(ref_sel), len(split_data[experiment.detector]["experiments"]) - 1, ) iset_id = split_data[ experiment.detector]["imagesets_found"].index( experiment.imageset) ref_sel["imageset_id"] = flex.int(ref_sel.size(), iset_id) split_data[experiment.detector]["reflections"].extend( ref_sel) for i, detector in enumerate(experiments.detectors()): experiment_filename = experiments_template(index=i) print("Saving experiment %d to %s" % (i, experiment_filename)) split_data[detector]["experiments"].as_json( experiment_filename) if reflections is not None: reflections_filename = reflections_template(index=i) print("Saving reflections for experiment %d to %s" % (i, reflections_filename)) split_data[detector]["reflections"].as_file( reflections_filename) elif params.output.chunk_size or params.output.chunk_sizes: def save_chunk(chunk_id, expts, refls): experiment_filename = experiments_template(index=chunk_id) print("Saving chunk %d to %s" % (chunk_id, experiment_filename)) expts.as_json(experiment_filename) if refls is not None: reflections_filename = reflections_template(index=chunk_id) print("Saving reflections for chunk %d to %s" % (chunk_id, reflections_filename)) refls.as_file(reflections_filename) chunk_counter = 0 chunk_expts = ExperimentList() if reflections: chunk_refls = flex.reflection_table() else: chunk_refls = None next_iset_id = 0 imagesets_found = OrderedSet() for i, experiment in enumerate(experiments): chunk_expts.append(experiment) if reflections: if reflections.experiment_identifiers().keys(): # first find which id value corresponds to experiment in question identifier = experiment.identifier id_ = None for k in reflections.experiment_identifiers().keys(): if reflections.experiment_identifiers( )[k] == identifier: id_ = k break if id_ is None: raise Sorry( "Unable to find id matching experiment identifier in reflection table." ) ref_sel = reflections.select(reflections["id"] == id_) # now reset ids and reset/update identifiers map for k in ref_sel.experiment_identifiers().keys(): del ref_sel.experiment_identifiers()[k] new_id = len(chunk_expts) - 1 ref_sel["id"] = flex.int(len(ref_sel), new_id) ref_sel.experiment_identifiers()[new_id] = identifier else: ref_sel = reflections.select(reflections["id"] == i) ref_sel["id"] = flex.int(len(ref_sel), len(chunk_expts) - 1) if experiment.imageset not in imagesets_found: imagesets_found.add(experiment.imageset) ref_sel["imageset_id"] = flex.int( ref_sel.size(), next_iset_id) next_iset_id += 1 else: iset_id = imagesets_found.index(experiment.imageset) ref_sel["imageset_id"] = flex.int( ref_sel.size(), iset_id) chunk_refls.extend(ref_sel) if params.output.chunk_sizes: chunk_limit = params.output.chunk_sizes[chunk_counter] else: chunk_limit = params.output.chunk_size if len(chunk_expts) == chunk_limit: save_chunk(chunk_counter, chunk_expts, chunk_refls) chunk_counter += 1 chunk_expts = ExperimentList() if reflections: chunk_refls = flex.reflection_table() else: chunk_refls = None if len(chunk_expts) > 0: save_chunk(chunk_counter, chunk_expts, chunk_refls) else: for i, experiment in enumerate(experiments): experiment_filename = experiments_template(index=i) print("Saving experiment %d to %s" % (i, experiment_filename)) ExperimentList([experiment]).as_json(experiment_filename) if reflections is not None: reflections_filename = reflections_template(index=i) print("Saving reflections for experiment %d to %s" % (i, reflections_filename)) ref_sel = reflections.select(reflections["id"] == i) if ref_sel.experiment_identifiers().keys(): identifier = ref_sel.experiment_identifiers()[i] for k in ref_sel.experiment_identifiers().keys(): del ref_sel.experiment_identifiers()[k] ref_sel["id"] = flex.int(ref_sel.size(), 0) ref_sel.experiment_identifiers()[0] = identifier else: ref_sel["id"] = flex.int(len(ref_sel), 0) ref_sel["imageset_id"] = flex.int(len(ref_sel), 0) ref_sel.as_file(reflections_filename) return