Example #1
0
def run(mtz, bs):
    # Open mtz
    mtz_file = iotbx.mtz.object(mtz)
    miller_arrays = mtz_file.as_miller_arrays()
    print "Opening", mtz

    for b in bs:
        mtz_out = os.path.splitext(os.path.basename(args[1]))[0] + "_b%.2f.mtz" % b
        mtz_dataset = None
        labels = ["H", "K", "L"]
        
        for i, ar in enumerate(miller_arrays):
            fake_label = 2 * string.uppercase[i]
            for lab in guess_array_output_labels(ar):
                labels.append(lab)
            array_types = get_original_array_types(mtz_file, ar.info().labels)
            default_types = iotbx.mtz.default_column_types(ar)
            if len(default_types) == len(array_types):
                column_types = array_types
            else:
                column_types = None

            if ar.is_xray_data_array() or ar.is_complex_array():
                fac = 2. if ar.is_xray_intensity_array() else 1.
                print "Applying B=%.2f to %s" % (b*fac, ar.info())
                k = flex.exp(-ar.d_star_sq().data() * b*fac)
                ar = ar.customized_copy(data=ar.data()*k, sigmas=ar.sigmas()*k if ar.sigmas() else None)

            mtz_dataset = add_array_to_mtz_dataset(mtz_dataset, ar, fake_label,
                                                   column_types)

        # Decide labels and write mtz file
        mtz_object = mtz_dataset.mtz_object()
        invalid_chars = re.compile("[^A-Za-z0-9_\-+\(\)]")

        used = dict([ (label, 0) for label in labels ])

        for i, column in enumerate(mtz_object.columns()):
            if column.label() != labels[i] :
                label = labels[i]
                original_label = label

                assert used[label] == 0

                try:
                    column.set_label(label)
                except RuntimeError, e:
                    if ("new_label is used already" in str(e)) :
                        col_names = [ col.label() for col in mtz_object.columns() ]
                        raise RuntimeError(("Duplicate column label '%s': current labels "+
                                            "are %s; user-specified output labels are %s.") %
                                           (label, " ".join(col_names), " ".join(labels)))
                else:
                    used[original_label] += 1

        mtz_object.write(file_name=mtz_out)

        print
        print "Writing:", mtz_out
        print
def run(mtz, mtz_out, ranges):
    # Open mtz
    mtz_file = iotbx.mtz.object(mtz)
    miller_arrays = mtz_file.as_miller_arrays()
    print "Opening", mtz

    mtz_dataset = None
    labels = ["H", "K", "L"]

    for i, ar in enumerate(miller_arrays):
        d_spacings = ar.d_spacings().data()
        sel = flex.bool(d_spacings.size(), True)
        for d1, d2 in ranges:
            dmax, dmin = max(d1, d2), min(d1, d2)
            tmp = d_spacings > dmax
            tmp |= dmin > d_spacings
            sel &= tmp

        print "%d reflections removed from %s" % (sum(~sel),
                                                  ar.info().label_string())
        fake_label = 2 * string.uppercase[i]
        for lab in guess_array_output_labels(ar):
            labels.append(lab)
        array_types = get_original_array_types(mtz_file, ar.info().labels)
        default_types = iotbx.mtz.default_column_types(ar)
        if len(default_types) == len(array_types):
            column_types = array_types
        else:
            column_types = None

        mtz_dataset = add_array_to_mtz_dataset(mtz_dataset, ar.select(sel),
                                               fake_label, column_types)

    # Decide labels and write mtz file
    mtz_object = mtz_dataset.mtz_object()
    invalid_chars = re.compile("[^A-Za-z0-9_\-+\(\)]")

    used = dict([(label, 0) for label in labels])

    for i, column in enumerate(mtz_object.columns()):
        if column.label() != labels[i]:
            label = labels[i]
            original_label = label

            assert used[label] == 0

            try:
                column.set_label(label)
            except RuntimeError, e:
                if ("new_label is used already" in str(e)):
                    col_names = [col.label() for col in mtz_object.columns()]
                    raise RuntimeError(
                        ("Duplicate column label '%s': current labels " +
                         "are %s; user-specified output labels are %s.") %
                        (label, " ".join(col_names), " ".join(labels)))
            else:
                used[original_label] += 1
def run(mtz, mtz_out, ranges):
    # Open mtz
    mtz_file = iotbx.mtz.object(mtz)
    miller_arrays = mtz_file.as_miller_arrays()
    print "Opening", mtz

    mtz_dataset = None
    labels = ["H", "K", "L"]

    for i, ar in enumerate(miller_arrays):
        d_spacings = ar.d_spacings().data()
        sel = flex.bool(d_spacings.size(), True)
        for d1, d2 in ranges:
            dmax, dmin = max(d1,d2), min(d1,d2)
            tmp = d_spacings > dmax
            tmp |= dmin > d_spacings
            sel &= tmp

        print "%d reflections removed from %s" % (sum(~sel), ar.info().label_string())
        fake_label = 2 * string.uppercase[i]
        for lab in guess_array_output_labels(ar):
            labels.append(lab)
        array_types = get_original_array_types(mtz_file, ar.info().labels)
        default_types = iotbx.mtz.default_column_types(ar)
        if len(default_types) == len(array_types):
            column_types = array_types
        else:
            column_types = None

        mtz_dataset = add_array_to_mtz_dataset(mtz_dataset, ar.select(sel), fake_label,
                                               column_types)


    # Decide labels and write mtz file
    mtz_object = mtz_dataset.mtz_object()
    invalid_chars = re.compile("[^A-Za-z0-9_\-+\(\)]")

    used = dict([ (label, 0) for label in labels ])

    for i, column in enumerate(mtz_object.columns()):
        if column.label() != labels[i] :
            label = labels[i]
            original_label = label

            assert used[label] == 0

            try:
                column.set_label(label)
            except RuntimeError, e:
                if ("new_label is used already" in str(e)) :
                    col_names = [ col.label() for col in mtz_object.columns() ]
                    raise RuntimeError(("Duplicate column label '%s': current labels "+
                                        "are %s; user-specified output labels are %s.") %
                                       (label, " ".join(col_names), " ".join(labels)))
            else:
                used[original_label] += 1
Example #4
0
}""")
    params = master_phil.fetch(source=new_phil).extract()
    try:
        miller_arrays = run_and_reload(params, "tst6.mtz")
    except Sorry, e:
        assert ("Five columns" in str(e))
    else:
        raise Exception_expected
    #params.mtz_file.miller_array[1].output_labels = ["F","SIGF"]
    #params.mtz_file.miller_array
    #print miller_arrays[1].info().label_string()
    params.mtz_file.miller_array[1].output_labels.append("ISYM")
    miller_arrays = run_and_reload(params, "tst6.mtz")
    arrays = mtz3.mtz_object().as_miller_arrays()
    assert (arrays[1].is_xray_reconstructed_amplitude_array())
    labels = reflection_file_editor.guess_array_output_labels(arrays[1])
    assert (labels == ["F", "SIGF", "DANO", "SIGDANO", "ISYM"])
    # now merged
    params.mtz_file.miller_array[1].output_labels = ["F", "SIGF"]
    params.mtz_file.miller_array[1].anomalous_data = "merged"
    miller_arrays = run_and_reload(params, "tst6.mtz")
    assert (miller_arrays[1].is_xray_amplitude_array())
    assert (miller_arrays[1].info().label_string() == "F,SIGF")
    # handle duplicate array labels
    params.mtz_file.output_file = "tst7.mtz"
    params.mtz_file.miller_array[1].anomalous_data = "Auto"
    params.mtz_file.miller_array[1].file_name = "tst_data.mtz"
    params.mtz_file.miller_array[1].labels = \
      "I-obs(+),SIGI-obs(+),I-obs(-),SIGI-obs(-)"
    params.mtz_file.miller_array[1].output_labels = \
      "I-obs(+) SIGI-obs(+) I-obs(-) SIGI-obs(-)".split()
Example #5
0
def run(mtz, bs):
    # Open mtz
    mtz_file = iotbx.mtz.object(mtz)
    miller_arrays = mtz_file.as_miller_arrays()
    print "Opening", mtz

    for b in bs:
        mtz_out = os.path.splitext(os.path.basename(
            args[1]))[0] + "_b%.2f.mtz" % b
        mtz_dataset = None
        labels = ["H", "K", "L"]

        for i, ar in enumerate(miller_arrays):
            fake_label = 2 * string.uppercase[i]
            for lab in guess_array_output_labels(ar):
                labels.append(lab)
            array_types = get_original_array_types(mtz_file, ar.info().labels)
            default_types = iotbx.mtz.default_column_types(ar)
            if len(default_types) == len(array_types):
                column_types = array_types
            else:
                column_types = None

            if ar.is_xray_data_array() or ar.is_complex_array():
                fac = 2. if ar.is_xray_intensity_array() else 1.
                print "Applying B=%.2f to %s" % (b * fac, ar.info())
                k = flex.exp(-ar.d_star_sq().data() * b * fac)
                ar = ar.customized_copy(data=ar.data() * k,
                                        sigmas=ar.sigmas() *
                                        k if ar.sigmas() else None)

            mtz_dataset = add_array_to_mtz_dataset(mtz_dataset, ar, fake_label,
                                                   column_types)

        # Decide labels and write mtz file
        mtz_object = mtz_dataset.mtz_object()
        invalid_chars = re.compile("[^A-Za-z0-9_\-+\(\)]")

        used = dict([(label, 0) for label in labels])

        for i, column in enumerate(mtz_object.columns()):
            if column.label() != labels[i]:
                label = labels[i]
                original_label = label

                assert used[label] == 0

                try:
                    column.set_label(label)
                except RuntimeError, e:
                    if ("new_label is used already" in str(e)):
                        col_names = [
                            col.label() for col in mtz_object.columns()
                        ]
                        raise RuntimeError(
                            ("Duplicate column label '%s': current labels " +
                             "are %s; user-specified output labels are %s.") %
                            (label, " ".join(col_names), " ".join(labels)))
                else:
                    used[original_label] += 1

        mtz_object.write(file_name=mtz_out)

        print
        print "Writing:", mtz_out
        print
Example #6
0
def exercise_basic(verbose=False):
    symm = crystal.symmetry(space_group_info=sgtbx.space_group_info("P212121"),
                            unit_cell=uctbx.unit_cell((6, 7, 8, 90, 90, 90)))
    set1 = miller.build_set(crystal_symmetry=symm,
                            anomalous_flag=True,
                            d_min=1.0)
    assert (set1.indices().size() == 341)
    data0 = flex.double(set1.indices().size(), 100.)
    sigmas1 = flex.double(set1.indices().size(), 4.)
    for i in range(10):
        data0[2 + i * 30] = -1
    for i in range(10):
        data0[5 + i * 30] = 7.5
    array0 = set1.array(data=data0, sigmas=sigmas1)
    array0.set_observation_type_xray_intensity()
    flags = array0.generate_r_free_flags(
        use_lattice_symmetry=True).average_bijvoet_mates()
    mtz0 = array0.as_mtz_dataset(column_root_label="I-obs")
    mtz0.add_miller_array(flags, column_root_label="R-free-flags")
    mtz0.mtz_object().write("tst_data.mtz")
    # convert intensities to amplitudes
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst1.mtz
  crystal_symmetry.space_group = P212121
  crystal_symmetry.unit_cell = 6,7,8,90,90,90
  miller_array {
    file_name = tst_data.mtz
    labels = I-obs(+),SIGI-obs(+),I-obs(-),SIGI-obs(-)
    output_labels = I-obs(+) SIGI-obs(+) I-obs(-) SIGI-obs(-)
  }
  miller_array {
    file_name = tst_data.mtz
    labels = R-free-flags
    output_labels = R-free-flags
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    log = sys.stdout
    if (not verbose):
        log = null_out()

    def run_and_reload(params, file_name):
        p = reflection_file_editor.process_arrays(params, log=log)
        p.finish()
        mtz_in = file_reader.any_file(file_name)
        miller_arrays = mtz_in.file_object.as_miller_arrays()
        return miller_arrays

    params.mtz_file.miller_array[0].output_as = "amplitudes"
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        assert ("inconsistent with" in str(e)), str(e)
    else:
        raise Exception_expected
    params.mtz_file.miller_array[0].output_labels = [
        'F-obs(+)', 'SIGF-obs(+)', 'F-obs(-)', 'SIGF-obs(-)'
    ]
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[0].info().labels == [
        'F-obs(+)', 'SIGF-obs(+)', 'F-obs(-)', 'SIGF-obs(-)'
    ])
    assert miller_arrays[0].is_xray_amplitude_array()
    data1 = miller_arrays[0].data()
    assert (flex.min(data1) == 0)
    # now with French-Wilson treatment
    params.mtz_file.miller_array[0].output_as = "amplitudes_fw"
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[0].info().labels == [
        'F-obs(+)', 'SIGF-obs(+)', 'F-obs(-)', 'SIGF-obs(-)'
    ])
    assert miller_arrays[0].is_xray_amplitude_array()
    assert (len(miller_arrays[0].data()) == 341)
    assert (flex.min(miller_arrays[0].data()) > 0)
    # force data type change
    params.mtz_file.miller_array[0].output_as = "auto"
    params.mtz_file.miller_array[0].force_type = "amplitudes"
    params.mtz_file.miller_array[0].output_labels = [
        'F-obs(+)', 'SIGF-obs(+)', 'F-obs(-)', 'SIGF-obs(-)'
    ]
    params.mtz_file.output_file = "tst2.mtz"
    miller_arrays = run_and_reload(params, "tst2.mtz")
    mtz_orig = file_reader.any_file("tst_data.mtz")
    orig_arrays = mtz_orig.file_server.miller_arrays
    data0 = orig_arrays[0].data()
    data2 = miller_arrays[0].data()
    assert miller_arrays[0].is_xray_amplitude_array()
    assert (data0.all_eq(data2)) and (data0.all_ne(data1))
    # convert to and from anomalous data
    params.mtz_file.output_file = "tst1.mtz"
    params.mtz_file.miller_array[0].force_type = "auto"
    params.mtz_file.miller_array[0].anomalous_data = "merged"
    try:
        run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        assert ("too many column labels" in str(e)), str(e)
    else:
        raise Exception_expected
    params.mtz_file.miller_array[0].output_labels = ["I-obs", "SIGI-obs"]
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (not miller_arrays[0].anomalous_flag())
    params.mtz_file.miller_array[1].anomalous_data = "anomalous"
    try:
        run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        assert ("not specified enough" in str(e)), str(e)
    else:
        raise Exception_expected
    params.mtz_file.miller_array[1].output_labels = [
        'R-free-flags(+)', 'R-free-flags(-)'
    ]
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert miller_arrays[1].anomalous_flag()
    # filter by signal-to-noise ratio
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].filter_by_signal_to_noise = 2.0
    miller_arrays = run_and_reload(params, "tst1.mtz")
    data = miller_arrays[0].data()
    sigmas = miller_arrays[0].sigmas()
    assert (data.size() == 321) and ((data / sigmas).all_ge(2.0))
    # data scaling
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].scale_factor = 2.0
    miller_arrays = run_and_reload(params, "tst1.mtz")
    data2 = miller_arrays[0].data()
    assert approx_equal(flex.max(data2), 2. * flex.max(data))
    # remove negatives
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].remove_negatives = True
    miller_arrays = run_and_reload(params, "tst1.mtz")
    n_refl = miller_arrays[0].data().size()
    assert (n_refl == 331), n_refl
    # scale to maximum value
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].scale_max = 2000.
    miller_arrays = run_and_reload(params, "tst1.mtz")
    data3 = miller_arrays[0].data()
    assert (flex.max(data3) == 2000.), flex.max(data3)
    # apply isotropic B-factor
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].add_b_iso = 20.0
    miller_arrays = run_and_reload(params, "tst1.mtz")
    data_b = miller_arrays[0].data()
    sigmas_b = miller_arrays[0].sigmas()
    assert approx_equal(data_b[0], 72.68358, eps=0.00001)
    # apply anisotropic B-factor
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].add_b_aniso = (20., 20., 20., 0., 0., 0.)
    miller_arrays = run_and_reload(params, "tst1.mtz")
    data_b = miller_arrays[0].data()
    sigmas_b = miller_arrays[0].sigmas()
    assert approx_equal(data_b[0], 72.68358, eps=0.00001)
    # shuffle data randomly
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].shuffle_values = True
    miller_arrays = run_and_reload(params, "tst1.mtz")
    data_shuffled = miller_arrays[0].data()
    assert (not data0.all_eq(data_shuffled))
    params.mtz_file.miller_array[0].reset_values_to = 12345.6
    miller_arrays = run_and_reload(params, "tst1.mtz")
    data_reset = miller_arrays[0].data()
    assert (data_reset.all_eq(data_reset[0]))
    # improper operations on R-free flags
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].scale_factor = None
    params.mtz_file.miller_array[1].output_as = "amplitudes"
    # this one won't actually crash (it will be ignored)
    miller_arrays = run_and_reload(params, "tst1.mtz")
    params.mtz_file.miller_array[1].output_as = "auto"
    params.mtz_file.miller_array[1].force_type = "amplitudes"
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        pass
    else:
        raise Exception_expected
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[1].filter_by_signal_to_noise = 2.0
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        pass
    else:
        raise Exception_expected
    # improper output labels
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[1].output_labels.append("NULL")
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        assert ("number of output labels" in str(e))
    else:
        raise Exception_expected
    params.mtz_file.miller_array[1].output_labels.pop()
    params.mtz_file.miller_array[0].output_labels.pop()
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        assert ("number of output labels" in str(e))
    else:
        raise Exception_expected
    # use column_root_label instead
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[0].column_root_label = "I_tst"
    params.mtz_file.miller_array[1].column_root_label = "FREE"
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[0].info().label_string() ==
            "I_tst(+),SIGI_tst(+),I_tst(-),SIGI_tst(-)")
    assert (miller_arrays[1].info().label_string() == "FREE")
    # incorrect root label
    params.mtz_file.miller_array[0].column_root_label = "asdf"
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as s:
        assert ("inconsistent" in str(s))
    else:
        raise Exception_expected
    params.mtz_file.miller_array[0].column_root_label = "F_tst"
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as s:
        assert ("inconsistent" in str(s))
    else:
        raise Exception_expected
    # same root label, but now with compatible array type
    params.mtz_file.miller_array[0].output_as = "amplitudes"
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[0].info().label_string() ==
            "F_tst(+),SIGF_tst(+),F_tst(-),SIGF_tst(-)")
    # R-free label conflicts
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.r_free_flags.force_generate = True
    params.mtz_file.r_free_flags.new_label = "R-free-flags"
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry:
        pass
    else:
        raise Exception_expected
    params.mtz_file.resolve_label_conflicts = True
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[-1].info().label_string() == "R-free-flags_2")
    # resolution filter
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.d_min = 2.0
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert approx_equal(miller_arrays[0].d_min(), 2.0, eps=0.01)
    assert approx_equal(miller_arrays[1].d_min(), 2.0, eps=0.01)
    # resolution filter for a specific array
    params.mtz_file.d_min = None
    params.mtz_file.miller_array[0].d_min = 2.0
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert approx_equal(miller_arrays[0].d_min(), 2.0, eps=0.01)
    assert approx_equal(miller_arrays[1].d_min(), 1.0, eps=0.01)
    # filter by index
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.exclude_reflection.append((1, 1, 1))
    params.mtz_file.exclude_reflection.append((1, 2, 3))
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[0].indices().size() == (set1.indices().size() - 2))
    # change-of-basis (reindexing)
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.crystal_symmetry.change_of_basis = "b,a,c"
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[0].unit_cell().parameters() == (7.0, 6.0, 8.0, 90.0,
                                                          90.0, 90.0))
    # again, with bad operator
    params.mtz_file.crystal_symmetry.change_of_basis = "P222"
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry:
        pass
    else:
        raise Exception_expected
    # r-free flags in same ASU
    params.mtz_file.crystal_symmetry.change_of_basis = "l,k,-h"
    params.mtz_file.output_file = "tst_c_o_b.mtz"
    miller_arrays = run_and_reload(params, "tst_c_o_b.mtz")
    assert (miller_arrays[0].completeness(use_binning=False) ==
            miller_arrays[1].completeness(use_binning=False) == 1.0)
    # expand symmetry (expand_to_p1=True)
    params.mtz_file.output_file = "tst1.mtz"
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.crystal_symmetry.expand_to_p1 = True
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[0].space_group_info().type().number() == 1)
    # change space group without changing anything else
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.crystal_symmetry.output_space_group = \
      sgtbx.space_group_info("P21212")
    miller_arrays = run_and_reload(params, "tst1.mtz")
    ma_new = miller_arrays[0].customized_copy(crystal_symmetry=array0)
    ma_new, array_orig = ma_new.common_sets(other=array0)
    assert ma_new.sigmas().all_eq(array_orig.sigmas())
    # expand symmetry (different output space group)
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.crystal_symmetry.output_space_group = \
      sgtbx.space_group_info("P21")
    miller_arrays = run_and_reload(params, "tst1.mtz")
    assert (miller_arrays[0].space_group_info().type().number() == 4)
    # incompatible input space_group/unit_cell
    params.mtz_file.crystal_symmetry.space_group = sgtbx.space_group_info("P4")
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        assert ("Input unit cell" in str(e))
    else:
        raise Exception_expected
    # incompatible output space_group/unit_cell
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.crystal_symmetry.output_space_group = \
      sgtbx.space_group_info("P6322")
    try:
        miller_arrays = run_and_reload(params, "tst1.mtz")
    except Sorry as e:
        assert ("incompatible with the specified" in str(e))
    else:
        raise Exception_expected
    # R-free manipulation (starting from incomplete flags)
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array[1].d_min = 1.2
    params.mtz_file.output_file = "tst_data2.mtz"
    miller_arrays = run_and_reload(params, "tst_data2.mtz")
    assert (miller_arrays[1].data().size() == 138)
    flags1 = miller_arrays[1].data()
    params = master_phil.fetch(source=new_phil).extract()
    # now extend the incomplete flags
    params.mtz_file.r_free_flags.extend = True
    for ma in params.mtz_file.miller_array:
        ma.file_name = "tst_data2.mtz"
    params.mtz_file.output_file = "tst4a.mtz"
    miller_arrays = run_and_reload(params, "tst4a.mtz")
    assert (miller_arrays[1].data().size() == 221)
    params.mtz_file.r_free_flags.relative_to_complete_set = True
    miller_arrays = run_and_reload(params, "tst4a.mtz")
    assert (miller_arrays[1].data().size() == 222)
    # and now to arbitrarily high resolution
    params.mtz_file.r_free_flags.relative_to_complete_set = False
    params.mtz_file.d_min = 0.8
    miller_arrays = run_and_reload(params, "tst4a.mtz")
    assert (miller_arrays[1].data().size() == 221)
    params.mtz_file.r_free_flags.relative_to_complete_set = True
    miller_arrays = run_and_reload(params, "tst4a.mtz")
    assert (miller_arrays[1].data().size() == 428)
    # export for ccp4 programs (flag=0, everything else > 0)
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.r_free_flags.export_for_ccp4 = True
    params.mtz_file.r_free_flags.preserve_input_values = False
    params.mtz_file.output_file = "tst_data3.mtz"
    miller_arrays = run_and_reload(params, "tst_data3.mtz")
    f_obs_orig = miller_arrays[1]
    free_selection = (miller_arrays[1].data() == 0)
    old_selection = (orig_arrays[1].data() == 1)
    assert (free_selection.all_eq(old_selection))
    # preserve input values (now in tst_data3.mtz)
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.output_file = "tst4.mtz"
    for ma in params.mtz_file.miller_array:
        ma.file_name = "tst_data3.mtz"
    # flags will be preserved here...
    miller_arrays = run_and_reload(params, "tst4.mtz")
    new_selection = (miller_arrays[1].data() == 0)
    assert (free_selection.all_eq(new_selection))
    # ...and here... [extending a CCP4 test set]
    params.mtz_file.miller_array[1].d_min = 1.2
    params.mtz_file.output_file = "tst_data4.mtz"
    truncated_arrays = run_and_reload(params, "tst_data4.mtz")
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.r_free_flags.extend = True
    for ma in params.mtz_file.miller_array:
        ma.file_name = "tst_data4.mtz"
    params.mtz_file.output_file = "tst4.mtz"
    miller_arrays = run_and_reload(params, "tst4.mtz")
    common_flags = truncated_arrays[1].common_set(other=miller_arrays[1])
    common_flags2 = miller_arrays[1].common_set(other=truncated_arrays[1])
    assert (common_flags2.data().all_eq(common_flags.data()))
    # ...but not here
    for ma in params.mtz_file.miller_array:
        ma.file_name = "tst_data3.mtz"
    params.mtz_file.output_file = "tst4.mtz"
    params.mtz_file.miller_array[1].d_min = None
    params.mtz_file.r_free_flags.preserve_input_values = False
    miller_arrays = run_and_reload(params, "tst4.mtz")
    new_selection = (miller_arrays[1].common_set(other=f_obs_orig).data() == 1)
    assert (free_selection.all_eq(new_selection))
    #
    # XXX note that the tests for adjust_fraction in cctbx.r_free_utils are
    # much more rigorous, because they use a larger test set - I keep it small
    # here to save on speed (especially I/O time).
    #
    # expand the test set
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.output_file = "tst4.mtz"
    params.mtz_file.r_free_flags.fraction = 0.15
    params.mtz_file.r_free_flags.adjust_fraction = True
    try:
        miller_arrays = run_and_reload(params, "tst4.mtz")
    except Sorry as s:
        assert ("resizing" in str(s))
    else:
        raise Exception_expected
    params.mtz_file.r_free_flags.preserve_input_values = False
    miller_arrays = run_and_reload(params, "tst4.mtz")
    flags_expanded = miller_arrays[-1]
    fraction_free = flags_expanded.data().count(
        1) / flags_expanded.data().size()
    assert (fraction_free > 0.145) and (fraction_free < 0.155)
    # now shrink it
    params.mtz_file.r_free_flags.fraction = 0.05
    miller_arrays = run_and_reload(params, "tst4.mtz")
    flags_shrunken = miller_arrays[-1]
    fraction_free = flags_shrunken.data().count(
        1) / flags_shrunken.data().size()
    assert (fraction_free > 0.049) and (fraction_free < 0.051)
    # more R-free manipulations
    mtz2 = array0.as_mtz_dataset(column_root_label="I-obs")
    flags2 = flags.generate_bijvoet_mates()
    mtz2.add_miller_array(flags2, column_root_label="R-free-flags")
    mtz2.mtz_object().write("tst_data4.mtz")
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst5.mtz
  crystal_symmetry.space_group = P212121
  crystal_symmetry.unit_cell = 6,7,8,90,90,90
  miller_array {
    file_name = tst_data4.mtz
    labels = I-obs(+),SIGI-obs(+),I-obs(-),SIGI-obs(-)
    output_labels = I-obs(+) SIGI-obs(+) I-obs(-) SIGI-obs(-)
  }
  miller_array {
    file_name = tst_data4.mtz
    labels = R-free-flags(+),R-free-flags(-)
    output_labels = R-free-flags
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    miller_arrays = run_and_reload(params, "tst5.mtz")
    assert ((miller_arrays[0].anomalous_flag())
            and (not miller_arrays[1].anomalous_flag()))
    # flags all the same value
    mtz2 = array0.as_mtz_dataset(column_root_label="I-obs")
    flags2 = flags.generate_bijvoet_mates()
    flags2 = flags2.customized_copy(data=flex.int(flags2.data().size(), 1))
    mtz2.add_miller_array(flags2, column_root_label="R-free-flags")
    mtz2.mtz_object().write("tst_data4.mtz")
    try:
        miller_arrays = run_and_reload(params, "tst5.mtz")
    except Sorry as s:
        pass
    else:
        raise Exception_expected
    # now force them through (no conversion to flex.bool)
    params.mtz_file.r_free_flags.extend = False
    params.mtz_file.r_free_flags.warn_if_all_same_value = False
    miller_arrays = run_and_reload(params, "tst5.mtz")
    assert miller_arrays[1].data().all_eq(1)
    # reconstructed amplitudes, yuck
    mtz3 = array0.as_mtz_dataset(column_root_label="I-obs")
    indices = array0.average_bijvoet_mates().indices()
    # XXX why does this come out as an unmerged array?
    mtz3.add_column(label="F",
                    type="F").set_reals(miller_indices=indices,
                                        data=flex.double(indices.size(), 100.))
    mtz3.add_column(label="SIGF",
                    type="Q").set_reals(miller_indices=indices,
                                        data=flex.double(indices.size(), 5.))
    mtz3.add_column(label="DANO",
                    type="D").set_reals(miller_indices=indices,
                                        data=flex.double(indices.size(), 10.))
    mtz3.add_column(label="SIGDANO",
                    type="Q").set_reals(miller_indices=indices,
                                        data=flex.double(indices.size(), 4.))
    mtz3.mtz_object().write("tst_data5.mtz")
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst6.mtz
  crystal_symmetry.space_group = P212121
  crystal_symmetry.unit_cell = 6,7,8,90,90,90
  miller_array {
    file_name = tst_data5.mtz
    labels = I-obs(+),SIGI-obs(+),I-obs(-),SIGI-obs(-)
    output_labels = I-obs(+) SIGI-obs(+) I-obs(-) SIGI-obs(-)
  }
  miller_array {
    file_name = tst_data5.mtz
    labels = F,SIGF,DANO,SIGDANO,merged
    output_labels = F SIGF DANO SIGDANO
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    try:
        miller_arrays = run_and_reload(params, "tst6.mtz")
    except Sorry as e:
        assert ("Five columns" in str(e))
    else:
        raise Exception_expected
    #params.mtz_file.miller_array[1].output_labels = ["F","SIGF"]
    #params.mtz_file.miller_array
    #print miller_arrays[1].info().label_string()
    params.mtz_file.miller_array[1].output_labels.append("ISYM")
    miller_arrays = run_and_reload(params, "tst6.mtz")
    arrays = mtz3.mtz_object().as_miller_arrays()
    assert (arrays[1].is_xray_reconstructed_amplitude_array())
    labels = reflection_file_editor.guess_array_output_labels(arrays[1])
    assert (labels == ["F", "SIGF", "DANO", "SIGDANO", "ISYM"])
    # now merged
    params.mtz_file.miller_array[1].output_labels = ["F", "SIGF"]
    params.mtz_file.miller_array[1].anomalous_data = "merged"
    miller_arrays = run_and_reload(params, "tst6.mtz")
    assert (miller_arrays[1].is_xray_amplitude_array())
    assert (miller_arrays[1].info().label_string() == "F,SIGF")
    # handle duplicate array labels
    params.mtz_file.output_file = "tst7.mtz"
    params.mtz_file.miller_array[1].anomalous_data = "Auto"
    params.mtz_file.miller_array[1].file_name = "tst_data.mtz"
    params.mtz_file.miller_array[1].labels = \
      "I-obs(+),SIGI-obs(+),I-obs(-),SIGI-obs(-)"
    params.mtz_file.miller_array[1].output_labels = \
      "I-obs(+) SIGI-obs(+) I-obs(-) SIGI-obs(-)".split()
    try:
        miller_arrays = run_and_reload(params, "tst7.mtz")
    except Sorry as e:
        assert ("Duplicate column label 'I-obs(+)'" in str(e))
    else:
        raise Exception_expected
    params.mtz_file.resolve_label_conflicts = True
    miller_arrays = run_and_reload(params, "tst7.mtz")
    assert (miller_arrays[1].info().label_string() ==
            "I-obs_2(+),SIGI-obs_2(+),I-obs_2(-),SIGI-obs_2(-)")
    # bad output path
    params.mtz_file.output_file = os.path.join("/foo", "bar", str(os.getpid()),
                                               "data.mtz")
    try:
        miller_arrays = run_and_reload(params, "tst6.mtz")
    except Sorry as e:
        pass
    else:
        raise Exception_expected
    # no input arrays
    params = master_phil.fetch(source=new_phil).extract()
    params.mtz_file.miller_array = []
    try:
        miller_arrays = run_and_reload(params, "tst6.mtz")
    except Sorry as e:
        pass
    else:
        raise Exception_expected
    # eliminate_sys_absent
    symm2 = symm.customized_copy(
        space_group_info=sgtbx.space_group_info("P222"))
    set2 = miller.build_set(crystal_symmetry=symm2,
                            anomalous_flag=True,
                            d_min=1.0)
    data2 = flex.double(set2.indices().size(), 100.)
    sigmas2 = flex.double(set2.indices().size(), 4.)
    array2 = set2.array(data=data2, sigmas=sigmas2)
    array2.set_observation_type_xray_amplitude()
    flags2 = array2.generate_r_free_flags(
        use_lattice_symmetry=True).average_bijvoet_mates()
    mtz2 = array2.as_mtz_dataset(column_root_label="F-obs", wavelength=1.54)
    mtz2.add_miller_array(flags2, column_root_label="R-free-flags")
    mtz2.mtz_object().write("tst_data8.mtz")
    assert (array2.indices().size() == 352)
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst8.mtz
  crystal_symmetry.space_group = P212121
  crystal_symmetry.unit_cell = 6,7,8,90,90,90
  miller_array {
    file_name = tst_data8.mtz
    labels = F-obs(+),SIGF-obs(+),F-obs(-),SIGF-obs(-)
    output_labels = F-obs(+) SIGF-obs(+) F-obs(-) SIGF-obs(-)
  }
  miller_array {
    file_name = tst_data8.mtz
    labels = R-free-flags
    output_labels = R-free-flags
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    miller_arrays = run_and_reload(params, "tst8.mtz")
    assert (miller_arrays[0].indices().size() == 341)
    params.mtz_file.crystal_symmetry.eliminate_sys_absent = False
    miller_arrays = run_and_reload(params, "tst8.mtz")
    assert (miller_arrays[0].indices().size() == 352)
    # wavelength
    assert approx_equal(miller_arrays[0].info().wavelength, 1.54)
    params.mtz_file.wavelength = 1.116
    params.mtz_file.output_file = "tst9.mtz"
    miller_arrays = run_and_reload(params, "tst9.mtz")
    assert approx_equal(miller_arrays[0].info().wavelength, 1.116)
    # second MTZ file with wavelength set by default to 1.0 (and ignored)
    mtz3 = array2.average_bijvoet_mates().as_mtz_dataset(
        column_root_label="F-obs_2", wavelength=None)
    mtz3.add_miller_array(flags2, column_root_label="R-free-flags")
    mtz3.mtz_object().write("tst_data9.mtz")
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst9.mtz
  crystal_symmetry.space_group = P212121
  crystal_symmetry.unit_cell = 6,7,8,90,90,90
  miller_array {
    file_name = tst_data8.mtz
    labels = F-obs(+),SIGF-obs(+),F-obs(-),SIGF-obs(-)
    output_labels = F-obs(+) SIGF-obs(+) F-obs(-) SIGF-obs(-)
  }
  miller_array {
    file_name = tst_data9.mtz
    labels = F-obs_2,SIGF-obs_2
    output_labels = F-obs_2 SIGF-obs_2
  }
  miller_array {
    file_name = tst_data8.mtz
    labels = R-free-flags
    output_labels = R-free-flags
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    miller_arrays = run_and_reload(params, "tst9.mtz")
    assert approx_equal(miller_arrays[0].info().wavelength, 1.54)
    # now try two MTZ files with wavelengths not equal to 1.0
    mtz3 = array2.average_bijvoet_mates().as_mtz_dataset(
        column_root_label="F-obs_2", wavelength=1.116)
    mtz3.add_miller_array(flags2, column_root_label="R-free-flags")
    mtz3.mtz_object().write("tst_data9.mtz")
    try:
        miller_arrays = run_and_reload(params, "tst9.mtz")
    except Sorry as s:
        assert (
            str(s) ==
            """Multiple wavelengths present in input experimental data arrays: 1.116, 1.54.  Please specify the wavelength parameter explicitly."""
        )
    else:
        raise Exception_expected
    params.mtz_file.wavelength = 1.54
    miller_arrays = run_and_reload(params, "tst9.mtz")
    assert approx_equal(miller_arrays[0].info().wavelength, 1.54)
    # Make sure wavelengths from non-experimental data arrays are ignored
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst9.mtz
  crystal_symmetry.space_group = P212121
  crystal_symmetry.unit_cell = 6,7,8,90,90,90
  miller_array {
    file_name = tst_data8.mtz
    labels = F-obs(+),SIGF-obs(+),F-obs(-),SIGF-obs(-)
    output_labels = F-obs(+) SIGF-obs(+) F-obs(-) SIGF-obs(-)
  }
  miller_array {
    file_name = tst_data9.mtz
    labels = R-free-flags
    output_labels = R-free-flags
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    miller_arrays = run_and_reload(params, "tst9.mtz")
    assert approx_equal(miller_arrays[0].info().wavelength, 1.54)
    # map coefficients
    n_refl = len(array2.indices())
    d1 = flex.random_double(n_refl)
    d2 = flex.random_double(n_refl)
    coeffs_data = flex.complex_double(d1, d2)
    map_coeffs = array2.customized_copy(data=coeffs_data,
                                        sigmas=None).average_bijvoet_mates()
    map_coeffs.as_mtz_dataset(
        column_root_label="2FOFCWT").mtz_object().write("tst_data10.mtz")
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst10.mtz
  crystal_symmetry.space_group = P212121
  crystal_symmetry.unit_cell = 6,7,8,90,90,90
  miller_array {
    file_name = tst_data10.mtz
    labels = 2FOFCWT,PHI2FOFCWT
    output_labels = 2FOFCWT PHI2FOFCWT
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    miller_arrays = run_and_reload(params, "tst10.mtz")
    # R-free mismatches
    symm = crystal.symmetry(space_group_info=sgtbx.space_group_info("P212121"),
                            unit_cell=uctbx.unit_cell(
                                (60, 70, 80, 90, 90, 90)))
    set1 = miller.build_set(crystal_symmetry=symm,
                            anomalous_flag=True,
                            d_min=1.0)
    flags = set1.generate_r_free_flags(fraction=0.1, max_free=None)
    n_flipped = 0
    for i_hkl, (h, k, l) in enumerate(flags.indices()):
        if (i_hkl % 100 == 0) and (h > 0) and (k > 0) and (l > 0):
            flag = flags.data()[i_hkl]
            if (not flag):
                n_flipped += 1
                flags.data()[i_hkl] = True
    assert (n_flipped > 0)
    flags.as_mtz_dataset(
        column_root_label="FreeR_flag").mtz_object().write("tst_data11.mtz")
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst11.mtz
  crystal_symmetry.space_group = P212121
  crystal_symmetry.unit_cell = 60,70,80,90,90,90
  miller_array {
    file_name = tst_data11.mtz
    labels = FreeR_flag(+),FreeR_flag(-)
    output_labels = FreeR_flag
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    try:
        miller_arrays = run_and_reload(params, "tst11.mtz")
    except Sorry:
        pass
    else:
        raise Exception_expected
    params.mtz_file.r_free_flags.remediate_mismatches = True
    params.mtz_file.r_free_flags.preserve_input_values = False
    miller_arrays = run_and_reload(params, "tst11.mtz")
    # R-free flags extension - corner case where (1,1,0) was being missed
    symm = crystal.symmetry(unit_cell=(45.23, 51.24, 80.49, 90, 90, 90),
                            space_group_symbol="P212121")
    ms = miller.build_set(crystal_symmetry=symm,
                          d_min=1.67290,
                          anomalous_flag=True).resolution_filter(d_max=33.9095)
    ma = ms.array(data=flex.double(ms.size(), 100.0),
                  sigmas=flex.double(ms.size(), 1.0))
    #FIXME check on other systems
    #assert (ma.average_bijvoet_mates().size() == 22238)
    ma.set_observation_type_xray_intensity()
    ma.export_as_scalepack_unmerged(file_name="tst_data12.sca")
    flags = ma.generate_r_free_flags().average_bijvoet_mates()
    cs2 = symm.customized_copy(unit_cell=(45.215, 51.203, 80.425, 90, 90, 90))
    flags = flags.customized_copy(crystal_symmetry=cs2).resolution_filter(
        d_max=24.396, d_min=1.94701)
    flags.as_mtz_dataset(
        column_root_label="FreeR_flag").mtz_object().write("tst_flags.mtz")
    new_phil = libtbx.phil.parse("""
mtz_file {
  output_file = tst12.mtz
  crystal_symmetry {
    unit_cell = 45.215 51.203 80.425 90 90 90
    space_group = P 21 21 21
    output_unit_cell = 45.23 51.24 80.49 90 90 90
  }
  miller_array {
    file_name = tst_data12.sca
    labels = I(+),SIGI(+),I(-),SIGI(-),merged
    output_labels = I(+) SIGI(+) I(-) SIGI(-)
  }
  miller_array {
    file_name = tst_flags.mtz
    labels = FreeR_flag
    output_labels = FreeR_flag
  }
  r_free_flags {
    extend = True
    relative_to_complete_set = True
  }
}""")
    params = master_phil.fetch(source=new_phil).extract()
    miller_arrays = run_and_reload(params, "tst12.mtz")
    f_obs, r_free_flags = miller_arrays
    ls = f_obs.average_bijvoet_mates().lone_set(other=r_free_flags)
    assert (ls.size() == 0)
}""")
  params = master_phil.fetch(source=new_phil).extract()
  try :
    miller_arrays = run_and_reload(params, "tst6.mtz")
  except Sorry, e :
    assert ("Five columns" in str(e))
  else :
    raise Exception_expected
  #params.mtz_file.miller_array[1].output_labels = ["F","SIGF"]
  #params.mtz_file.miller_array
  #print miller_arrays[1].info().label_string()
  params.mtz_file.miller_array[1].output_labels.append("ISYM")
  miller_arrays = run_and_reload(params, "tst6.mtz")
  arrays = mtz3.mtz_object().as_miller_arrays()
  assert (arrays[1].is_xray_reconstructed_amplitude_array())
  labels = reflection_file_editor.guess_array_output_labels(arrays[1])
  assert (labels == ["F","SIGF","DANO","SIGDANO","ISYM"])
  # now merged
  params.mtz_file.miller_array[1].output_labels = ["F","SIGF"]
  params.mtz_file.miller_array[1].anomalous_data = "merged"
  miller_arrays = run_and_reload(params, "tst6.mtz")
  assert (miller_arrays[1].is_xray_amplitude_array())
  assert (miller_arrays[1].info().label_string() == "F,SIGF")
  # handle duplicate array labels
  params.mtz_file.output_file = "tst7.mtz"
  params.mtz_file.miller_array[1].anomalous_data = "Auto"
  params.mtz_file.miller_array[1].file_name = "tst_data.mtz"
  params.mtz_file.miller_array[1].labels = \
    "I-obs(+),SIGI-obs(+),I-obs(-),SIGI-obs(-)"
  params.mtz_file.miller_array[1].output_labels = \
    "I-obs(+) SIGI-obs(+) I-obs(-) SIGI-obs(-)".split()