コード例 #1
0
def vertices_and_indices():

    icosahedron = mesh.Mesh.from_file(output('subdivided-5.stl'))

    vertices = OrderedSet()
    
    for face in icosahedron:

        v1 = face[0:3]
        v2 = face[3:6]
        v3 = face[6:9]

        vertices.add(tuple(v1))
        vertices.add(tuple(v2))
        vertices.add(tuple(v3))

    with open(output("vertices"), 'w') as vertices_output:

        for vertex in vertices: 

            theta, phi = spherical(vertex)
            vertices_output.write(f"{theta}, {phi},\n")

    with open(output("indices"), 'w') as indices_output:

        for face in icosahedron:

            v1 = vertices.index(tuple(face[0:3]))
            v2 = vertices.index(tuple(face[3:6]))
            v3 = vertices.index(tuple(face[6:9]))

            indices_output.write(f"{v1}, {v2}, {v3},\n")

    return vertices
コード例 #2
0
ファイル: statistics.py プロジェクト: hamidgh09/npf
    def buildDataset(cls, all_results: Dataset, testie: Testie) -> List[tuple]:
        dtype = testie.variables.dtype()
        y = OrderedDict()
        dataset = []
        for i, (run, results_types) in enumerate(all_results.items()):
            vars = list(run.variables.values())
            if not results_types is None and len(results_types) > 0:
                dataset.append(vars)
                for result_type, results in results_types.items():
                    r = np.mean(results)
                    y.setdefault(result_type, []).append(r)

        dtype['values'] = [None] * len(dtype['formats'])
        for i, f in enumerate(dtype['formats']):
            if f is str:
                dtype['formats'][i] = int
                values = OrderedSet()
                for row in dataset:
                    values.add(row[i])
                    row[i] = values.index(row[i])
                dtype['values'][i] = list(values)
        X = np.array(dataset, ndmin=2)

        lset = []
        for result_type, v in y.items():
            lset.append((result_type, X, np.array(v),dtype))
        return lset
コード例 #3
0
ファイル: samprocessor.py プロジェクト: bm2-lab/cage
def __Dedupe(items, key=None):
    seen = OrderedSet()
    num_seen = list()
    gn_item = (item for item in items)
    while True:
        try:
            item = gn_item.next()
        except Exception as e:
            yield (None, num_seen)
            break
        else:
            val = item if key is None else key(item)
            if val not in seen:
                yield (item, None)
                seen.add(val)
                num_seen.append(1)
            else:
                num_seen[seen.index(val)] += 1
コード例 #4
0
def vectorize(sentence):

    # set of unique words in the whole document.
    unique_words = OrderedSet()

    for sent in sentence:
        for word in sent:

            unique_words.add(word)

    unique_words = list(
        unique_words
    )  # converting the set to a list to make it easier to work with it.

    #print(unique_words, len(unique_words))

    # a list of lists that contains the vectorized form of each sentence in the document.
    vector = list()

    # in the vectorized representation, we consider the bag of words (unique words in the text).
    # then, we count the occurenc of each word in a sentence and represent it in a vector whose length = length(unique_words)
    # ex: sent1 = "i am a boy"
    #     sent2 = "i am a girl"
    # unique_words = ["i", "am", "a", "boy", "girl"]
    # vector representation of sent1 = [1, 1, 1, 1, 0]
    # vector representation of sent2 = [1, 1, 1, 0, 1]

    for sent in sentence:  # iterate for every sentence in the document
        temp_vector = [0] * len(
            unique_words
        )  # create a temporary vector to calculate the occurence of each word in that sentence.

        for word in sent:  # iterate for every word in the sentence.

            temp_vector[unique_words.index(word)] += 1

        vector.append(
            temp_vector
        )  # add the temporary vector to the list of vectors for each sentence (list of lists)

    #print(vector)

    return vector, unique_words
コード例 #5
0
ファイル: statistics.py プロジェクト: SDarayan/npf
    def buildDataset(cls, all_results: Dataset, testie: Testie):
        dtype = testie.variables.dtype()
        y = []
        dataset = []
        for i, (run, results) in enumerate(all_results.items()):
            vars = list(run.variables.values())
            if not results is None:
                dataset.append(vars)
                y.append(np.mean(results))
        dtype['formats'] = dtype['formats']
        dtype['names'] = dtype['names']

        for i, f in enumerate(dtype['formats']):
            if f is str:
                dtype['formats'][i] = int
                values = OrderedSet()
                for row in dataset:
                    values.add(row[i])
                    row[i] = values.index(row[i])
        X = np.array(dataset, ndmin=2)
        return X, np.array(y, dtype=[('result', float)])
コード例 #6
0
def update_imageset_ids(experiments, reflections):
    """For a list of input experiments and reflections (each containing one
    sweep), update or add the imageset_id column to the data to match the order
    in the experiment list.

    This means that when the reflection tables are combined, the data is correct.
    """
    # input a list of ordered matching experiments and reflection tables.

    next_iset_id = 0
    imagesets_found = OrderedSet()
    for expt, table in zip(experiments, reflections):
        if "imageset_id" in table:
            assert len(set(table["imageset_id"])) == 1
        iset = expt.imageset
        if iset not in imagesets_found:
            imagesets_found.add(iset)
            table["imageset_id"] = flex.int(table.size(), next_iset_id)
            next_iset_id += 1
        else:
            iset_id = imagesets_found.index(iset)
            table["imageset_id"] = flex.int(table.size(), iset_id)
    return reflections
コード例 #7
0
    orient='split')  #table.to_json('file.json',orient='split')
#print json_str
chk2 = json.loads(json_str)  #json.load(open('file.json'))

yrs = OrderedSet([i[0] for i in chk2['index']])
items = OrderedSet([i[1] for i in chk2['index']])
metrics = OrderedSet([i[0] for i in chk2['columns']])
mths = [
    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
    'Nov', 'Dec'
]

indices = chk2['index']
datum = chk2['data']
cols = chk2['columns']
cuts = [len(datum[0]) / (len(metrics) - metrics.index(i)) for i in metrics]
#print cuts

productDict = {k: dict() for k in items}

for i in yrs:
    for j in productDict.values():
        j[i] = []

for i in metrics:
    for j in yrs:
        for k in items:
            dataitems = [
                chk2['data'][z]
                for z in np.where(np.array(chk2['index']) == k)[0]
            ]
コード例 #8
0
def run_integration(params, experiments, reference=None):
    """Perform the integration.

    Returns:
        experiments: The integrated experiments
        reflections: The integrated reflections
        report(optional): An integration report.

    Raises:
        ValueError: For a number of bad inputs
        RuntimeError: If the profile model creation fails
    """
    predicted = None
    rubbish = None

    for abs_params in params.absorption_correction:
        if abs_params.apply:
            if not (
                params.integration.debug.output
                and not params.integration.debug.separate_files
            ):
                raise ValueError(
                    "Shoeboxes must be saved to integration intermediates to apply an absorption correction. "
                    + "Set integration.debug.output=True, integration.debug.separate_files=False and "
                    + "integration.debug.delete_shoeboxes=True to temporarily store shoeboxes."
                )

    # Print if we're using a mask
    for i, exp in enumerate(experiments):
        mask = exp.imageset.external_lookup.mask
        if mask.filename is not None:
            if mask.data:
                logger.info("Using external mask: %s", mask.filename)
                for tile in mask.data:
                    logger.info(" Mask has %d pixels masked", tile.data().count(False))

    # Print the experimental models
    for i, exp in enumerate(experiments):
        summary = "\n".join(
            (
                "",
                "=" * 80,
                "",
                "Experiments",
                "",
                "Models for experiment %d" % i,
                "",
                str(exp.beam),
                str(exp.detector),
            )
        )
        if exp.goniometer:
            summary += str(exp.goniometer) + "\n"
        if exp.scan:
            summary += str(exp.scan) + "\n"
        summary += str(exp.crystal)
        logger.info(summary)

    logger.info("\n".join(("", "=" * 80, "")))
    logger.info(heading("Initialising"))

    # Load the data
    if reference:
        reference, rubbish = process_reference(reference)

        # Check pixels don't belong to neighbours
        if exp.goniometer is not None and exp.scan is not None:
            reference = filter_reference_pixels(reference, experiments)

        # Modify experiment list if scan range is set.
        experiments, reference = split_for_scan_range(
            experiments, reference, params.scan_range
        )

    # Modify experiment list if exclude images is set
    if params.exclude_images:
        for experiment in experiments:
            for index in params.exclude_images:
                experiment.imageset.mark_for_rejection(index, True)

    # Predict the reflections
    logger.info("\n".join(("", "=" * 80, "")))
    logger.info(heading("Predicting reflections"))
    predicted = flex.reflection_table.from_predictions_multi(
        experiments,
        dmin=params.prediction.d_min,
        dmax=params.prediction.d_max,
        margin=params.prediction.margin,
        force_static=params.prediction.force_static,
        padding=params.prediction.padding,
    )
    isets = OrderedSet(e.imageset for e in experiments)
    predicted["imageset_id"] = flex.int(predicted.size(), 0)
    if len(isets) > 1:
        for e in experiments:
            iset_id = isets.index(e.imageset)
            for id_ in predicted.experiment_identifiers().keys():
                identifier = predicted.experiment_identifiers()[id_]
                if identifier == e.identifier:
                    sel = predicted["id"] == id_
                    predicted["imageset_id"].set_selected(sel, iset_id)
                    break

    # Match reference with predicted
    if reference:
        matched, reference, unmatched = predicted.match_with_reference(reference)
        assert len(matched) == len(predicted)
        assert matched.count(True) <= len(reference)
        if matched.count(True) == 0:
            raise ValueError(
                """
        Invalid input for reference reflections.
        Zero reference spots were matched to predictions
    """
            )
        elif unmatched:
            msg = (
                "Warning: %d reference spots were not matched to predictions"
                % unmatched.size()
            )
            border = "\n".join(("", "*" * 80, ""))
            logger.info("".join((border, msg, border)))
            rubbish.extend(unmatched)

        if len(experiments) > 1:
            # filter out any experiments without matched reference reflections
            # f_: filtered

            f_reference = flex.reflection_table()
            f_predicted = flex.reflection_table()
            f_rubbish = flex.reflection_table()
            f_experiments = ExperimentList()
            good_expt_count = 0

            def refl_extend(src, dest, eid):
                old_id = eid
                new_id = good_expt_count
                tmp = src.select(src["id"] == old_id)
                tmp["id"] = flex.int(len(tmp), good_expt_count)
                if old_id in tmp.experiment_identifiers():
                    identifier = tmp.experiment_identifiers()[old_id]
                    del tmp.experiment_identifiers()[old_id]
                    tmp.experiment_identifiers()[new_id] = identifier
                dest.extend(tmp)

            for expt_id, experiment in enumerate(experiments):
                if len(reference.select(reference["id"] == expt_id)) != 0:
                    refl_extend(reference, f_reference, expt_id)
                    refl_extend(predicted, f_predicted, expt_id)
                    refl_extend(rubbish, f_rubbish, expt_id)
                    f_experiments.append(experiment)
                    good_expt_count += 1
                else:
                    logger.info(
                        "Removing experiment %d: no reference reflections matched to predictions",
                        expt_id,
                    )

            reference = f_reference
            predicted = f_predicted
            experiments = f_experiments
            rubbish = f_rubbish

    # Select a random sample of the predicted reflections
    if not params.sampling.integrate_all_reflections:
        predicted = sample_predictions(experiments, predicted, params)

    # Compute the profile model - either load existing or compute
    # can raise RuntimeError
    experiments = ProfileModelFactory.create(params, experiments, reference)
    for expr in experiments:
        if expr.profile is None:
            raise ValueError("No profile information in experiment list")
    del reference

    # Compute the bounding box
    predicted.compute_bbox(experiments)

    # Create the integrator
    integrator = create_integrator(params, experiments, predicted)

    # Integrate the reflections
    reflections = integrator.integrate()

    # Remove unintegrated reflections
    if not params.output.output_unintegrated_reflections:
        keep = reflections.get_flags(reflections.flags.integrated, all=False)
        logger.info(
            "Removing %d unintegrated reflections of %d total",
            keep.count(False),
            keep.size(),
        )

        reflections = reflections.select(keep)

    # Append rubbish data onto the end
    if rubbish is not None and params.output.include_bad_reference:
        mask = flex.bool(len(rubbish), True)
        rubbish.unset_flags(mask, rubbish.flags.integrated_sum)
        rubbish.unset_flags(mask, rubbish.flags.integrated_prf)
        rubbish.set_flags(mask, rubbish.flags.bad_reference)
        reflections.extend(rubbish)

    # Correct integrated intensities for absorption correction, if necessary
    for abs_params in params.absorption_correction:
        if abs_params.apply and abs_params.algorithm == "fuller_kapton":
            from dials.algorithms.integration.kapton_correction import (
                multi_kapton_correction,
            )

            experiments, reflections = multi_kapton_correction(
                experiments, reflections, abs_params.fuller_kapton, logger=logger
            )()

    if params.significance_filter.enable:
        from dials.algorithms.integration.stills_significance_filter import (
            SignificanceFilter,
        )

        sig_filter = SignificanceFilter(params)
        filtered_refls = sig_filter(experiments, reflections)
        accepted_expts = ExperimentList()
        accepted_refls = flex.reflection_table()
        logger.info(
            "Removed %d reflections out of %d when applying significance filter",
            (reflections.size() - filtered_refls.size()),
            reflections.size(),
        )
        for expt_id, expt in enumerate(experiments):
            refls = filtered_refls.select(filtered_refls["id"] == expt_id)
            if refls:
                accepted_expts.append(expt)
                current_id = expt_id
                new_id = len(accepted_expts) - 1
                refls["id"] = flex.int(len(refls), new_id)
                if expt.identifier:
                    del refls.experiment_identifiers()[current_id]
                    refls.experiment_identifiers()[new_id] = expt.identifier
                accepted_refls.extend(refls)
            else:
                logger.info(
                    "Removed experiment %d which has no reflections left after applying significance filter",
                    expt_id,
                )

        if not accepted_refls:
            raise ValueError("No reflections left after applying significance filter")
        experiments = accepted_expts
        reflections = accepted_refls

    # Write a report if requested
    report = None
    if params.output.report is not None:
        report = integrator.report()

    return experiments, reflections, report
コード例 #9
0
ファイル: train_mushroom.py プロジェクト: dkvtieu/kokoro
# Acquire new column names
new_cols = OrderedSet()
for col in df:
    for val in df[col]:
        title = col + "_" + val
        new_cols.add(title)

all_new_rows = []

for idx, row in df.iterrows():
    new_row = [0] * len(new_cols)
    row_dict = row.to_dict()

    for key in row_dict:
        val = key + "_" + row_dict[key]
        match_idx = new_cols.index(val)
        new_row[match_idx] = 1

    all_new_rows.append(new_row)

data = np.array(all_new_rows)
new_df = pd.DataFrame(data=data, columns=new_cols)

input_dataset = np.matrix(new_df.loc[:, 'cap-shape_x':])
output_dataset = np.matrix(new_df.loc[:, 'class_p':'class_p'])

input_dataset = input_dataset[:][:1000]
output_dataset = output_dataset[:][:1000]

normed_input = (input_dataset - input_dataset.mean()) / input_dataset.std()
コード例 #10
0
ファイル: text.py プロジェクト: wwyf/ai_middle_project
class TfidfVectorizer():
    def __init__(self):
        self.word_ordered_set = OrderedSet()
        self.dictionary = {}
        self.dictionary_len = len(self.dictionary)
        pass
    
    def fit(self, raw_documents):
        """

        build(or add) the dictionary in class

        Parameters
        ------------
        raw_documents : iterable

        """
        for text in raw_documents:
            words = word_tokenizer(text)
            for word in words:
                self.word_ordered_set.add(word)
        for word in self.word_ordered_set:
            self.dictionary[word] = self.word_ordered_set.index(word)
        self.dictionary_len = len(self.dictionary)
    

    def fit_transform(self, raw_documents):
        self.fit(raw_documents)
        return self.transform(raw_documents)

    def transform(self, raw_documents):
    # def get_TD_IDE_mat(raw_documents):
        """
        Parameters
        ------------
        raw_documents : iterable of string

        Returns
        -----------
        df_idf_mat : TD_IDE矩阵,行数为文段数量,列数为不重复单词数量
        """
        # 计算矩阵维度
        row_d = len(raw_documents)
        column_d = self.dictionary_len+1
        # print(row_d, column_d)
        # 初始化矩阵
        num_mat = np.zeros((row_d, column_d))
        df_mat = np.zeros((row_d, column_d))
        idf_mat = np.zeros((1, column_d))
        # 遍历每一行,计算num_mat, num_mat[i][j]表示第i行句子中单词j出现的个数
        for row_index, row in enumerate(raw_documents):
            words = word_tokenizer(row)
            for word in words:
                if word in self.dictionary:
                    num_mat[row_index][[self.dictionary[word]]] += 1
                else :
                    num_mat[row_index][[self.dictionary_len]] = 1
        # 计算df矩阵
        df_mat = num_mat/num_mat.sum(axis=1).reshape(row_d, 1)
        # 计算idf值 
        count_mat = num_mat
        count_mat[count_mat != 0] = 1
        idf_mat = np.log(row_d/(count_mat.sum(axis=0)+1).reshape(1, column_d))
        # 得到了df_mat, idf_mat
        df_idf_mat = df_mat * idf_mat
        return df_idf_mat
コード例 #11
0
ファイル: text.py プロジェクト: wwyf/ai_middle_project
class CountVectorizer():
    """
    Parameters
    -----------


    """
    def __init__(self):
        # 这一个表是用来查同义词的,可以将一些具有相同意义的字符串转换成一样的字符串
        self.lookup_table = {}
        # TODO: 待完善
        self.comfused_words_set = set()
        self.not_comfused_words_set = set()
        self.word_ordered_set = OrderedSet()
        self.dictionary = {}
        self.dictionary_len = len(self.dictionary)
        pass



    
    def fit(self, raw_documents):
        """

        build(or add) the dictionary in class

        Parameters
        ------------
        raw_documents : iterable

        """
        for text in raw_documents:
            words = word_tokenizer(text)
            for word in words:
                self.word_ordered_set.add(word)
        for word in self.word_ordered_set:
            self.dictionary[word] = self.word_ordered_set.index(word)
        self.dictionary_len = len(self.dictionary)
    

    def transform(self, raw_documents):
        """
        Transform documents to document-term matrix.

        Extract token counts out of raw text documents using the vocabulary fitted with fit or the one provided to the constructor.

        Parameters
        ------------
        raw_documents : iterable

        Returns
        --------
        X : 2d array-like sparse matrix, shape(n_samples, n_features)
        """
        text_matrix = np.zeros((len(raw_documents), self.dictionary_len+1))
        for i, document in enumerate(raw_documents):
            words = word_tokenizer(document)
            for word in words:
                if word in self.dictionary:
                    text_matrix[i,self.dictionary[word]] += 1
                else :
                    text_matrix[i,self.dictionary_len] = 1
        return text_matrix
    
    def fit_transform(self, raw_documents):
        """
        Transform documents to document-term matrix.
        This is equivalent to fit followed by transform, but more efficiently implemented.
        Parameters
        ------------
        raw_documents : iterable

        Returns
        --------
        X : 2d array-like sparse matrix, shape(n_samples, n_features)
        """
        self.fit(raw_documents)
        return self.transform(raw_documents)
コード例 #12
0
    def getBarchartData(self, datum1, datum2):
        # data = json.dumps(datum1)
        df = pd.DataFrame(datum1)
        #df = pd.read_csv('test1.csv')
        #df = pd.read_json(json.loads(sys.argv[1]), orient='index')

        yrs = df['yr'].unique()
        df["MTH"] = pd.Categorical(df['MTH'], [
            'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
            'Oct', 'Nov', 'Dec'
        ])
        table = pd.pivot_table(df,
                               index=['yr', 'ITEM_CLASS'],
                               columns=['MTH'],
                               aggfunc=np.sum)
        json_str = table.to_json(
            orient='split')  #table.to_json('file.json',orient='split')
        #print json_str
        chk2 = json.loads(json_str)  #json.load(open('file.json'))

        yrs = OrderedSet([i[0] for i in chk2['index']])
        items = OrderedSet([i[1] for i in chk2['index']])
        metrics = OrderedSet([i[0] for i in chk2['columns']])
        mths = [
            'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
            'Oct', 'Nov', 'Dec'
        ]

        indices = chk2['index']
        datum = chk2['data']
        cols = chk2['columns']
        cuts = [
            len(datum[0]) / (len(metrics) - metrics.index(i)) for i in metrics
        ]
        #print cuts

        productDict = {k: dict() for k in items}

        for i in yrs:
            for j in productDict.values():
                j[i] = []

        for i in metrics:
            for j in yrs:
                for k in items:
                    dataitems = [
                        chk2['data'][z]
                        for z in np.where(np.array(chk2['index']) == k)[0]
                    ]
                    data_for_metric = dataitems[yrs.index(
                        j)][cuts[metrics.index(i)] - 12:cuts[metrics.index(i)]]
                    productDict[k][j].append({
                        "name":
                        i,
                        "data": [[mths[f], data_for_metric[f]]
                                 for f in range(len(data_for_metric))]
                    })
        seriesDict = {k: [] for k in items}
        # print('did it get here?')
        for i in metrics:
            for j in items:
                dataitems = [
                    chk2['data'][z]
                    for z in np.where(np.array(chk2['index']) == j)[0]
                ]
                #print dataitems
                seriesDict[j].append({
                    "name":
                    i,
                    "data": [{
                        "name":
                        l,
                        "drilldown":
                        True,
                        "y":
                        round(
                            sum([
                                float(f) for f in dataitems[yrs.index(l)]
                                [cuts[metrics.index(i)] -
                                 12:cuts[metrics.index(i)]] if f is not None
                            ]), 2)
                    } for l in yrs]
                })
        return json.dumps(productDict[datum2]), "-", json.dumps(
            seriesDict[datum2])
コード例 #13
0
ファイル: split_experiments.py プロジェクト: kmdalton/dials
    def run(self, args=None):
        """Execute the script."""

        # Parse the command line
        params, _ = self.parser.parse_args(args, show_diff_phil=True)

        # Try to load the models and data
        if not params.input.experiments:
            print("No Experiments found in the input")
            self.parser.print_help()
            return
        if params.input.reflections:
            if len(params.input.reflections) != len(params.input.experiments):
                raise Sorry(
                    "The number of input reflections files does not match the "
                    "number of input experiments")
        reflections, experiments = reflections_and_experiments_from_files(
            params.input.reflections, params.input.experiments)
        if reflections:
            reflections = reflections[0]
        else:
            reflections = None

        experiments_template = functools.partial(
            params.output.template.format,
            prefix=params.output.experiments_prefix,
            maxindexlength=len(str(len(experiments) - 1)),
            extension="expt",
        )

        reflections_template = functools.partial(
            params.output.template.format,
            prefix=params.output.reflections_prefix,
            maxindexlength=len(str(len(experiments) - 1)),
            extension="refl",
        )

        if params.output.chunk_sizes:
            if not sum(params.output.chunk_sizes) == len(experiments):
                raise Sorry(
                    "Sum of chunk sizes list (%s) not equal to number of experiments (%s)"
                    % (sum(params.output.chunk_sizes), len(experiments)))

        if params.by_wavelength:
            if reflections:
                if not reflections.experiment_identifiers():
                    raise Sorry(
                        "Unable to split by wavelength as no experiment "
                        "identifiers are set in the reflection table.")
            if all(experiments.identifiers() == ""):
                raise Sorry("Unable to split by wavelength as no experiment "
                            "identifiers are set in the experiment list.")

            wavelengths = match_wavelengths(experiments)
            for i, wl in enumerate(sorted(wavelengths.keys())):
                expids = []
                new_exps = ExperimentList()
                exp_nos = wavelengths[wl]
                imageset_ids = []  # record imageset ids to set in refl table
                imagesets_found = OrderedSet()
                for j in exp_nos:
                    expids.append(experiments[j].identifier)  # string
                    new_exps.append(experiments[j])
                    imagesets_found.add(experiments[j].imageset)
                    imageset_ids.append(
                        imagesets_found.index(experiments[j].imageset))
                experiment_filename = experiments_template(index=i)
                print(
                    f"Saving experiments with wavelength {wl} to {experiment_filename}"
                )
                new_exps.as_json(experiment_filename)
                if reflections:
                    refls = reflections.select_on_experiment_identifiers(
                        expids)
                    refls["imageset_id"] = flex.int(refls.size(), 0)
                    # now set the imageset ids
                    for k, iset_id in enumerate(imageset_ids):
                        # select the experiment based on id (unique per sweep),
                        # and set the imageset_id (not necessarily unique per sweep
                        # if imageset is shared)
                        sel = refls["id"] == k
                        refls["imageset_id"].set_selected(sel, iset_id)
                    reflections_filename = reflections_template(index=i)
                    print("Saving reflections with wavelength %s to %s" %
                          (wl, reflections_filename))
                    refls.as_file(reflections_filename)

        elif params.by_detector:
            assert (not params.output.chunk_size
                    ), "chunk_size + by_detector is not implemented"
            if reflections is None:
                split_data = {
                    detector: {
                        "experiments": ExperimentList()
                    }
                    for detector in experiments.detectors()
                }
            else:
                split_data = {
                    detector: {
                        "experiments": ExperimentList(),
                        "reflections": flex.reflection_table(),
                        "imagesets_found": OrderedSet(),
                    }
                    for detector in experiments.detectors()
                }
            for i, experiment in enumerate(experiments):
                split_expt_id = experiments.detectors().index(
                    experiment.detector)
                experiment_filename = experiments_template(index=split_expt_id)
                print("Adding experiment %d to %s" % (i, experiment_filename))
                split_data[experiment.detector]["experiments"].append(
                    experiment)
                if reflections is not None:
                    reflections_filename = reflections_template(
                        index=split_expt_id)
                    split_data[experiment.detector]["imagesets_found"].add(
                        experiment.imageset)
                    print("Adding reflections for experiment %d to %s" %
                          (i, reflections_filename))
                    if reflections.experiment_identifiers().keys():
                        # first find which id value corresponds to experiment in question
                        identifier = experiment.identifier
                        id_ = None
                        for k in reflections.experiment_identifiers().keys():
                            if reflections.experiment_identifiers(
                            )[k] == identifier:
                                id_ = k
                                break
                        if id_ is None:
                            raise Sorry(
                                "Unable to find id matching experiment identifier in reflection table."
                            )
                        ref_sel = reflections.select(reflections["id"] == id_)
                        # now reset ids and reset/update identifiers map
                        for k in ref_sel.experiment_identifiers().keys():
                            del ref_sel.experiment_identifiers()[k]
                        new_id = len(
                            split_data[experiment.detector]["experiments"]) - 1
                        ref_sel["id"] = flex.int(len(ref_sel), new_id)
                        ref_sel.experiment_identifiers()[new_id] = identifier
                    else:
                        ref_sel = reflections.select(reflections["id"] == i)
                        ref_sel["id"] = flex.int(
                            len(ref_sel),
                            len(split_data[experiment.detector]["experiments"])
                            - 1,
                        )
                    iset_id = split_data[
                        experiment.detector]["imagesets_found"].index(
                            experiment.imageset)
                    ref_sel["imageset_id"] = flex.int(ref_sel.size(), iset_id)
                    split_data[experiment.detector]["reflections"].extend(
                        ref_sel)

            for i, detector in enumerate(experiments.detectors()):
                experiment_filename = experiments_template(index=i)
                print("Saving experiment %d to %s" % (i, experiment_filename))
                split_data[detector]["experiments"].as_json(
                    experiment_filename)

                if reflections is not None:
                    reflections_filename = reflections_template(index=i)
                    print("Saving reflections for experiment %d to %s" %
                          (i, reflections_filename))
                    split_data[detector]["reflections"].as_file(
                        reflections_filename)
        elif params.output.chunk_size or params.output.chunk_sizes:

            def save_chunk(chunk_id, expts, refls):
                experiment_filename = experiments_template(index=chunk_id)
                print("Saving chunk %d to %s" %
                      (chunk_id, experiment_filename))
                expts.as_json(experiment_filename)
                if refls is not None:
                    reflections_filename = reflections_template(index=chunk_id)
                    print("Saving reflections for chunk %d to %s" %
                          (chunk_id, reflections_filename))
                    refls.as_file(reflections_filename)

            chunk_counter = 0
            chunk_expts = ExperimentList()
            if reflections:
                chunk_refls = flex.reflection_table()
            else:
                chunk_refls = None
            next_iset_id = 0
            imagesets_found = OrderedSet()
            for i, experiment in enumerate(experiments):
                chunk_expts.append(experiment)
                if reflections:
                    if reflections.experiment_identifiers().keys():
                        # first find which id value corresponds to experiment in question
                        identifier = experiment.identifier
                        id_ = None
                        for k in reflections.experiment_identifiers().keys():
                            if reflections.experiment_identifiers(
                            )[k] == identifier:
                                id_ = k
                                break
                        if id_ is None:
                            raise Sorry(
                                "Unable to find id matching experiment identifier in reflection table."
                            )
                        ref_sel = reflections.select(reflections["id"] == id_)
                        # now reset ids and reset/update identifiers map
                        for k in ref_sel.experiment_identifiers().keys():
                            del ref_sel.experiment_identifiers()[k]
                        new_id = len(chunk_expts) - 1
                        ref_sel["id"] = flex.int(len(ref_sel), new_id)
                        ref_sel.experiment_identifiers()[new_id] = identifier
                    else:
                        ref_sel = reflections.select(reflections["id"] == i)
                        ref_sel["id"] = flex.int(len(ref_sel),
                                                 len(chunk_expts) - 1)
                    if experiment.imageset not in imagesets_found:
                        imagesets_found.add(experiment.imageset)
                        ref_sel["imageset_id"] = flex.int(
                            ref_sel.size(), next_iset_id)
                        next_iset_id += 1
                    else:
                        iset_id = imagesets_found.index(experiment.imageset)
                        ref_sel["imageset_id"] = flex.int(
                            ref_sel.size(), iset_id)
                    chunk_refls.extend(ref_sel)
                if params.output.chunk_sizes:
                    chunk_limit = params.output.chunk_sizes[chunk_counter]
                else:
                    chunk_limit = params.output.chunk_size
                if len(chunk_expts) == chunk_limit:
                    save_chunk(chunk_counter, chunk_expts, chunk_refls)
                    chunk_counter += 1
                    chunk_expts = ExperimentList()
                    if reflections:
                        chunk_refls = flex.reflection_table()
                    else:
                        chunk_refls = None
            if len(chunk_expts) > 0:
                save_chunk(chunk_counter, chunk_expts, chunk_refls)
        else:
            for i, experiment in enumerate(experiments):

                experiment_filename = experiments_template(index=i)
                print("Saving experiment %d to %s" % (i, experiment_filename))
                ExperimentList([experiment]).as_json(experiment_filename)

                if reflections is not None:
                    reflections_filename = reflections_template(index=i)
                    print("Saving reflections for experiment %d to %s" %
                          (i, reflections_filename))
                    ref_sel = reflections.select(reflections["id"] == i)
                    if ref_sel.experiment_identifiers().keys():
                        identifier = ref_sel.experiment_identifiers()[i]
                        for k in ref_sel.experiment_identifiers().keys():
                            del ref_sel.experiment_identifiers()[k]
                        ref_sel["id"] = flex.int(ref_sel.size(), 0)
                        ref_sel.experiment_identifiers()[0] = identifier
                    else:
                        ref_sel["id"] = flex.int(len(ref_sel), 0)
                    ref_sel["imageset_id"] = flex.int(len(ref_sel), 0)
                    ref_sel.as_file(reflections_filename)

        return