Ejemplo n.º 1
0
def print_validation(log, results, debug, pdb_hierarchy_selected):
    box_1 = results.box_1
    box_2 = results.box_2
    box_3 = results.box_3
    sites_cart_box = box_1.xray_structure_box.sites_cart()
    sel = maptbx.grid_indices_around_sites(
        unit_cell=box_1.xray_structure_box.unit_cell(),
        fft_n_real=box_1.map_box.focus(),
        fft_m_real=box_1.map_box.all(),
        sites_cart=sites_cart_box,
        site_radii=flex.double(sites_cart_box.size(), 2.0))
    b1 = box_1.map_box.select(sel).as_1d()
    b2 = box_2.map_box.select(sel).as_1d()
    b3 = box_3.map_box.select(sel).as_1d()
    print >> log, "Map 1: calculated Fobs with ligand"
    print >> log, "Map 2: calculated Fobs without ligand"
    print >> log, "Map 3: real Fobs data"
    cc12 = flex.linear_correlation(x=b1, y=b2).coefficient()
    cc13 = flex.linear_correlation(x=b1, y=b3).coefficient()
    cc23 = flex.linear_correlation(x=b2, y=b3).coefficient()
    print >> log, "CC(1,2): %6.4f" % cc12
    print >> log, "CC(1,3): %6.4f" % cc13
    print >> log, "CC(2,3): %6.4f" % cc23
    #### D-function
    b1 = maptbx.volume_scale_1d(map=b1, n_bins=10000).map_data()
    b2 = maptbx.volume_scale_1d(map=b2, n_bins=10000).map_data()
    b3 = maptbx.volume_scale_1d(map=b3, n_bins=10000).map_data()
    print >> log, "Peak CC:"
    print >> log, "CC(1,2): %6.4f" % flex.linear_correlation(
        x=b1, y=b2).coefficient()
    print >> log, "CC(1,3): %6.4f" % flex.linear_correlation(
        x=b1, y=b3).coefficient()
    print >> log, "CC(2,3): %6.4f" % flex.linear_correlation(
        x=b2, y=b3).coefficient()
    cutoffs = flex.double([i / 10. for i in range(1, 10)] +
                          [i / 100 for i in range(91, 100)])
    d12 = maptbx.discrepancy_function(map_1=b1, map_2=b2, cutoffs=cutoffs)
    d13 = maptbx.discrepancy_function(map_1=b1, map_2=b3, cutoffs=cutoffs)
    d23 = maptbx.discrepancy_function(map_1=b2, map_2=b3, cutoffs=cutoffs)
    print >> log, "q    D(1,2) D(1,3) D(2,3)"
    for c, d12_, d13_, d23_ in zip(cutoffs, d12, d13, d23):
        print >> log, "%4.2f %6.4f %6.4f %6.4f" % (c, d12_, d13_, d23_)
    ###
    if (debug):
        #box_1.write_ccp4_map(file_name="box_1_polder.ccp4")
        #box_2.write_ccp4_map(file_name="box_2_polder.ccp4")
        #box_3.write_ccp4_map(file_name="box_3_polder.ccp4")
        write_map_box(box=box_1, filename="box_1_polder.ccp4")
        write_map_box(box=box_2, filename="box_2_polder.ccp4")
        write_map_box(box=box_3, filename="box_3_polder.ccp4")
        pdb_hierarchy_selected.adopt_xray_structure(box_1.xray_structure_box)
        pdb_hierarchy_selected.write_pdb_file(
            file_name="box_polder.pdb",
            crystal_symmetry=box_1.box_crystal_symmetry)
    #
    print >> log, '*' * 79
    message = result_message(cc12=cc12, cc13=cc13, cc23=cc23)
    print >> log, message
    return message
Ejemplo n.º 2
0
def exercise_cc_peak():
  def get_map():
    av = [random.random() for i in xrange(10*20*30)]
    m = flex.double(av)
    m = m-flex.min(m)
    m = m/flex.max(m)
    m.resize(flex.grid((10,20,30)))
    return m
  m1 = get_map()
  m2 = get_map()
  for t in range(0,11):
    t=t/10.
    ccp=maptbx.cc_peak(map_1=m1, map_2=m2, cutoff=t)
  #
  sites_frac = flex.vec3_double([
    (0.50,0.50,0.50)])
  from cctbx import xray
  xray_structure = xray.structure(
    crystal_symmetry=crystal.symmetry(
      unit_cell=(5,5,5,90,90,90),
      space_group_symbol="P1"),
    scatterers=flex.xray_scatterer([
      xray.scatterer(label=str(i), scattering_type="C", site=site_frac)
        for i,site_frac in enumerate(sites_frac)]))
  fc1 = xray_structure.structure_factors(d_min=1.6).f_calc()
  fc2 = xray_structure.structure_factors(d_min=1.7).f_calc()
  for t in range(0,11):
    t=t/10.
    ccp=maptbx.cc_peak(map_coeffs_1=fc1, map_coeffs_2=fc2, cutoff=t)
  #
  m1_he = maptbx.volume_scale(map = m1,  n_bins = 10000).map_data()
  m2_he = maptbx.volume_scale(map = m2,  n_bins = 10000).map_data()
  cutoffs = flex.double([i/20. for i in range(1,20)])
  df = maptbx.discrepancy_function(map_1=m1_he, map_2=m2_he, cutoffs=cutoffs)
  #
  fc1 = xray_structure.structure_factors(d_min=2.2).f_calc()
  fc2 = xray_structure.structure_factors(d_min=2.2).f_calc()
  for t in range(0,10):
    t=t/10.
    ccp=maptbx.cc_peak(map_coeffs_1=fc1, map_coeffs_2=fc2, cutoff=t)
    assert approx_equal(ccp, 1)
  # 1D case
  m1_he_1d = maptbx.volume_scale_1d(map = m1.as_1d(),  n_bins = 10000).map_data()
  m2_he_1d = maptbx.volume_scale_1d(map = m2.as_1d(),  n_bins = 10000).map_data()
  df_1d = maptbx.discrepancy_function(
    map_1=m1_he_1d, map_2=m2_he_1d, cutoffs=cutoffs)
  assert approx_equal(df, df_1d)
Ejemplo n.º 3
0
def cmd_run(args, validated=False, out=sys.stdout):
    if (len(args) == 0):
        print >> out, "-" * 79
        print >> out, "                               phenix.polder"
        print >> out, "-" * 79
        print >> out, legend
        print >> out, "-" * 79
        master_params.show(out=out)
        return
    log = multi_out()
    log.register("stdout", out)
    log_file_name = "polder.log"
    logfile = open(log_file_name, "w")
    log.register("logfile", logfile)
    print >> log, "phenix.polder is running..."
    print >> log, "input parameters:\n", args
    parsed = master_params
    inputs = mmtbx.utils.process_command_line_args(args=args,
                                                   master_params=parsed)
    #inputs.params.show() #check
    params = inputs.params.extract()
    # check model file
    if len(inputs.pdb_file_names) == 0:
        if (params.model_file_name is None):
            raise Sorry("No model file found.")
    elif (len(inputs.pdb_file_names) == 1):
        params.model_file_name = inputs.pdb_file_names[0]
    else:
        raise Sorry("Only one model file should be given")
    # check reflection file
    reflection_files = inputs.reflection_files
    if (len(reflection_files) == 0):
        if (params.reflection_file_name is None):
            raise Sorry("No reflection file found.")
        else:
            hkl_in = file_reader.any_file(params.reflection_file_name,
                                          force_type="hkl")
            hkl_in.assert_file_type("hkl")
            reflection_files = [hkl_in.file_object]
    # crystal symmetry
    crystal_symmetry = None
    crystal_symmetry = inputs.crystal_symmetry
    if (crystal_symmetry is None):
        crystal_symmetries = []
        for f in [
                str(params.model_file_name),
                str(params.reflection_file_name)
        ]:
            cs = crystal_symmetry_from_any.extract_from(f)
            if (cs is not None): crystal_symmetries.append(cs)
        if (len(crystal_symmetries) == 1):
            crystal_symmetry = crystal_symmetries[0]
        elif (len(crystal_symmetries) == 0):
            raise Sorry("No crystal symmetry found.")
        else:
            if (not crystal_symmetries[0].is_similar_symmetry(
                    crystal_symmetries[1])):
                raise Sorry(
                    "Crystal symmetry mismatch between different files.")
            crystal_symmetry = crystal_symmetries[0]
    f_obs, r_free_flags = None, None
    rfs = reflection_file_utils.reflection_file_server(
        crystal_symmetry=crystal_symmetry,
        force_symmetry=True,
        reflection_files=reflection_files,
        err=StringIO())
    parameters = mmtbx.utils.data_and_flags_master_params().extract()
    if (params.data_labels is not None):
        parameters.labels = params.data_labels
    if (params.r_free_flags_labels is not None):
        parameters.r_free_flags.label = params.r_free_flags_labels
    determined_data_and_flags = mmtbx.utils.determine_data_and_flags(
        reflection_file_server=rfs,
        parameters=parameters,
        keep_going=True,
        log=StringIO())
    f_obs = determined_data_and_flags.f_obs
    if (params.data_labels is None):
        params.data_labels = f_obs.info().label_string()
    if (params.reflection_file_name is None):
        params.reflection_file_name = parameters.file_name
    r_free_flags = determined_data_and_flags.r_free_flags
    assert f_obs is not None
    print >> log, "Input data:"
    print >> log, "  Iobs or Fobs:", f_obs.info().labels
    if (r_free_flags is not None):
        print >> log, "  Free-R flags:", r_free_flags.info().labels
        params.r_free_flags_labels = r_free_flags.info().label_string()
    else:
        print >> log, "  Free-R flags: Not present"
    model_basename = os.path.basename(params.model_file_name.split(".")[0])
    if (len(model_basename) > 0 and params.output_file_name_prefix is None):
        params.output_file_name_prefix = model_basename
    print params.output_file_name_prefix
    new_params = master_params.format(python_object=params)
    new_params.show()
    if (not validated):
        validate_params(params)
    pdb_input = iotbx.pdb.input(file_name=params.model_file_name)
    pdb_hierarchy = pdb_input.construct_hierarchy()
    xray_structure = pdb_hierarchy.extract_xray_structure(
        crystal_symmetry=crystal_symmetry)
    # DON'T USE:
    # xray_structure = pdb_input.xray_structure_simple()
    # atom order might be wrong
    mmtbx.utils.setup_scattering_dictionaries(
        scattering_table=params.scattering_table,
        xray_structure=xray_structure,
        d_min=f_obs.d_min())
    #if f_obs is not None:
    f_obs = f_obs.resolution_filter(d_min=params.high_resolution,
                                    d_max=params.low_resolution)
    if (r_free_flags is not None):
        r_free_flags = r_free_flags.resolution_filter(
            d_min=params.high_resolution, d_max=params.low_resolution)
# Grab case that data are anomalous
    if (f_obs.anomalous_flag()):
        f_obs, r_free_flags = prepare_f_obs_and_flags(
            f_obs=f_obs, r_free_flags=r_free_flags)
    cpm_obj = compute_polder_map(f_obs=f_obs,
                                 r_free_flags=r_free_flags,
                                 xray_structure=xray_structure,
                                 pdb_hierarchy=pdb_hierarchy,
                                 params=params,
                                 log=log)
    # Significance check
    fmodel = mmtbx.f_model.manager(f_obs=f_obs,
                                   r_free_flags=r_free_flags,
                                   xray_structure=xray_structure)
    fmodel.update_all_scales(remove_outliers=False, fast=True)
    f_obs_1 = abs(fmodel.f_model())
    fmodel.update_xray_structure(
        xray_structure=cpm_obj.xray_structure_noligand,
        update_f_calc=True,
        update_f_mask=True,
        force_update_f_mask=True)
    # PVA: do we need it? fmodel.update_all_scales(remove_outliers=False)
    f_obs_2 = abs(fmodel.f_model())
    xrs_selected = cpm_obj.pdb_hierarchy_selected.extract_xray_structure(
        crystal_symmetry=f_obs.crystal_symmetry())
    f_calc = f_obs.structure_factors_from_scatterers(
        xray_structure=cpm_obj.xray_structure_noligand).f_calc()
    f_mask = f_obs.structure_factors_from_map(map=cpm_obj.mask_polder,
                                              use_scale=True,
                                              anomalous_flag=False,
                                              use_sg=False)

    def get_poler_diff_map(f_obs):
        fmodel = mmtbx.f_model.manager(f_obs=f_obs,
                                       r_free_flags=r_free_flags,
                                       f_calc=f_calc,
                                       f_mask=f_mask)
        fmodel.update_all_scales(remove_outliers=False)
        mc_diff = map_tools.electron_density_map(
            fmodel=fmodel).map_coefficients(map_type="mFo-DFc",
                                            isotropize=True,
                                            fill_missing=False)
        fft_map = miller.fft_map(crystal_gridding=cpm_obj.crystal_gridding,
                                 fourier_coefficients=mc_diff)
        fft_map.apply_sigma_scaling()
        map_data = fft_map.real_map_unpadded()
        return mmtbx.utils.extract_box_around_model_and_map(
            xray_structure=xrs_selected, map_data=map_data, box_cushion=2.1)

    box_1 = get_poler_diff_map(f_obs=f_obs_1)
    box_2 = get_poler_diff_map(f_obs=f_obs_2)
    box_3 = get_poler_diff_map(f_obs=f_obs)
    sites_cart_box = box_1.xray_structure_box.sites_cart()
    sel = maptbx.grid_indices_around_sites(
        unit_cell=box_1.xray_structure_box.unit_cell(),
        fft_n_real=box_1.map_box.focus(),
        fft_m_real=box_1.map_box.all(),
        sites_cart=sites_cart_box,
        site_radii=flex.double(sites_cart_box.size(), 2.0))
    b1 = box_1.map_box.select(sel).as_1d()
    b2 = box_2.map_box.select(sel).as_1d()
    b3 = box_3.map_box.select(sel).as_1d()
    print >> log, "Map 1: calculated Fobs with ligand"
    print >> log, "Map 2: calculated Fobs without ligand"
    print >> log, "Map 3: real Fobs data"
    print >> log, "CC(1,2): %6.4f" % flex.linear_correlation(
        x=b1, y=b2).coefficient()
    print >> log, "CC(1,3): %6.4f" % flex.linear_correlation(
        x=b1, y=b3).coefficient()
    print >> log, "CC(2,3): %6.4f" % flex.linear_correlation(
        x=b2, y=b3).coefficient()
    ### D-function
    b1 = maptbx.volume_scale_1d(map=b1, n_bins=10000).map_data()
    b2 = maptbx.volume_scale_1d(map=b2, n_bins=10000).map_data()
    b3 = maptbx.volume_scale_1d(map=b3, n_bins=10000).map_data()
    print >> log, "Peak CC:"
    print >> log, "CC(1,2): %6.4f" % flex.linear_correlation(
        x=b1, y=b2).coefficient()
    print >> log, "CC(1,3): %6.4f" % flex.linear_correlation(
        x=b1, y=b3).coefficient()
    print >> log, "CC(2,3): %6.4f" % flex.linear_correlation(
        x=b2, y=b3).coefficient()
    cutoffs = flex.double([i / 10. for i in range(1, 10)] +
                          [i / 100 for i in range(91, 100)])
    d12 = maptbx.discrepancy_function(map_1=b1, map_2=b2, cutoffs=cutoffs)
    d13 = maptbx.discrepancy_function(map_1=b1, map_2=b3, cutoffs=cutoffs)
    d23 = maptbx.discrepancy_function(map_1=b2, map_2=b3, cutoffs=cutoffs)
    print >> log, "q    D(1,2) D(1,3) D(2,3)"
    for c, d12_, d13_, d23_ in zip(cutoffs, d12, d13, d23):
        print >> log, "%4.2f %6.4f %6.4f %6.4f" % (c, d12_, d13_, d23_)
    ###
    if (params.debug):
        box_1.write_ccp4_map(file_name="box_1_polder.ccp4")
        box_2.write_ccp4_map(file_name="box_2_polder.ccp4")
        box_3.write_ccp4_map(file_name="box_3_polder.ccp4")
        cpm_obj.pdb_hierarchy_selected.adopt_xray_structure(
            box_1.xray_structure_box)
        cpm_obj.pdb_hierarchy_selected.write_pdb_file(
            file_name="box_polder.pdb",
            crystal_symmetry=box_1.box_crystal_symmetry)
    #
    polder_file_name = "polder_map_coeffs.mtz"
    if (params.output_file_name_prefix is not None):
        polder_file_name = params.output_file_name_prefix + "_" + polder_file_name
    #
    print >> log, '*' * 79
    print >> log, 'File %s was written.' % polder_file_name
    print >> log, "Finished."
    return True
Ejemplo n.º 4
0
def run(args, out=sys.stdout, validated=False):
    show_citation(out=out)
    if (len(args) == 0):
        master_phil.show(out=out)
        print('\nUsage: phenix.map_comparison <CCP4> <CCP4>\n',\
          '       phenix.map_comparison <CCP4> <MTZ> mtz_label_1=<label>\n',\
          '       phenix.map_comparison <MTZ 1> mtz_label_1=<label 1> <MTZ 2> mtz_label_2=<label 2>\n', file=out)
        sys.exit()

    # process arguments
    params = None
    input_attributes = ['map_1', 'mtz_1', 'map_2', 'mtz_2']
    try:  # automatic parsing
        params = phil.process_command_line_with_files(
            args=args, master_phil=master_phil).work.extract()
    except Exception:  # map_file_def only handles one map phil
        from libtbx.phil.command_line import argument_interpreter
        arg_int = argument_interpreter(master_phil=master_phil)
        command_line_args = list()
        map_files = list()
        for arg in args:
            if (os.path.isfile(arg)):
                map_files.append(arg)
            else:
                command_line_args.append(arg_int.process(arg))
        params = master_phil.fetch(sources=command_line_args).extract()

        # check if more files are necessary
        n_defined = 0
        for attribute in input_attributes:
            if (getattr(params.input, attribute) is not None):
                n_defined += 1

        # matches files to phil scope, stops once there is sufficient data
        for map_file in map_files:
            if (n_defined < 2):
                current_map = file_reader.any_file(map_file)
                if (current_map.file_type == 'ccp4_map'):
                    n_defined += 1
                    if (params.input.map_1 is None):
                        params.input.map_1 = map_file
                    elif (params.input.map_2 is None):
                        params.input.map_2 = map_file
                elif (current_map.file_type == 'hkl'):
                    n_defined += 1
                    if (params.input.mtz_1 is None):
                        params.input.mtz_1 = map_file
                    elif (params.input.mtz_2 is None):
                        params.input.mtz_2 = map_file
            else:
                print('WARNING: only the first two files are used', file=out)
                break

    # validate arguments (GUI sets validated to true, no need to run again)
    assert (params is not None)
    if (not validated):
        validate_params(params)

    # ---------------------------------------------------------------------------
    # check if maps need to be generated from mtz
    n_maps = 0
    maps = list()
    map_names = list()
    for attribute in input_attributes:
        filename = getattr(params.input, attribute)
        if (filename is not None):
            map_names.append(filename)
            current_map = file_reader.any_file(filename)
            maps.append(current_map)
            if (current_map.file_type == 'ccp4_map'):
                n_maps += 1

    # construct maps, if necessary
    crystal_gridding = None
    m1 = None
    m2 = None

    # 1 map, 1 mtz file
    if (n_maps == 1):
        for current_map in maps:
            if (current_map.file_type == 'ccp4_map'):
                uc = current_map.file_object.unit_cell()
                sg_info = space_group_info(
                    current_map.file_object.space_group_number)
                n_real = current_map.file_object.unit_cell_grid
                crystal_gridding = maptbx.crystal_gridding(
                    uc, space_group_info=sg_info, pre_determined_n_real=n_real)
                m1 = current_map.file_object.map_data()
        if (crystal_gridding is not None):
            label = None
            for attribute in [('mtz_1', 'mtz_label_1'),
                              ('mtz_2', 'mtz_label_2')]:
                filename = getattr(params.input, attribute[0])
                label = getattr(params.input, attribute[1])
                if ((filename is not None) and (label is not None)):
                    break
            # labels will match currently open mtz file
            for current_map in maps:
                if (current_map.file_type == 'hkl'):
                    m2 = miller.fft_map(
                        crystal_gridding=crystal_gridding,
                        fourier_coefficients=current_map.file_server.
                        get_miller_array(
                            label)).apply_sigma_scaling().real_map_unpadded()
        else:
            raise Sorry('Gridding is not defined.')

    # 2 mtz files
    elif (n_maps == 0):
        crystal_symmetry = get_crystal_symmetry(maps[0])
        d_min = min(get_d_min(maps[0]), get_d_min(maps[1]))
        crystal_gridding = maptbx.crystal_gridding(
            crystal_symmetry.unit_cell(),
            d_min=d_min,
            resolution_factor=params.options.resolution_factor,
            space_group_info=crystal_symmetry.space_group_info())
        m1 = miller.fft_map(
            crystal_gridding=crystal_gridding,
            fourier_coefficients=maps[0].file_server.get_miller_array(
                params.input.mtz_label_1)).apply_sigma_scaling(
                ).real_map_unpadded()
        m2 = miller.fft_map(
            crystal_gridding=crystal_gridding,
            fourier_coefficients=maps[1].file_server.get_miller_array(
                params.input.mtz_label_2)).apply_sigma_scaling(
                ).real_map_unpadded()

    # 2 maps
    else:
        m1 = maps[0].file_object.map_data()
        m2 = maps[1].file_object.map_data()

    # ---------------------------------------------------------------------------
    # analyze maps
    assert ((m1 is not None) and (m2 is not None))

    # show general statistics
    s1 = maptbx.more_statistics(m1)
    s2 = maptbx.more_statistics(m2)
    show_overall_statistics(out=out, s=s1, header="Map 1 (%s):" % map_names[0])
    show_overall_statistics(out=out, s=s2, header="Map 2 (%s):" % map_names[1])
    cc_input_maps = flex.linear_correlation(x=m1.as_1d(),
                                            y=m2.as_1d()).coefficient()
    print("CC, input maps: %6.4f" % cc_input_maps, file=out)

    # compute CCpeak
    cc_peaks = list()
    m1_he = maptbx.volume_scale(map=m1, n_bins=10000).map_data()
    m2_he = maptbx.volume_scale(map=m2, n_bins=10000).map_data()
    cc_quantile = flex.linear_correlation(x=m1_he.as_1d(),
                                          y=m2_he.as_1d()).coefficient()
    print("CC, quantile rank-scaled (histogram equalized) maps: %6.4f" % \
      cc_quantile, file=out)
    print("Peak correlation:", file=out)
    print("  cutoff  CCpeak", file=out)
    cutoffs = [i / 100.
               for i in range(1, 90)] + [i / 1000 for i in range(900, 1000)]
    for cutoff in cutoffs:
        cc_peak = maptbx.cc_peak(map_1=m1_he, map_2=m2_he, cutoff=cutoff)
        print("  %3.2f   %7.4f" % (cutoff, cc_peak), file=out)
        cc_peaks.append((cutoff, cc_peak))

    # compute discrepancy function (D-function)
    discrepancies = list()
    cutoffs = flex.double(cutoffs)
    df = maptbx.discrepancy_function(map_1=m1_he, map_2=m2_he, cutoffs=cutoffs)
    print("Discrepancy function:", file=out)
    print("  cutoff  D", file=out)
    for c, d in zip(cutoffs, df):
        print("  %3.2f   %7.4f" % (c, d), file=out)
        discrepancies.append((c, d))

    # compute and output histograms
    h1 = maptbx.histogram(map=m1, n_bins=10000)
    h2 = maptbx.histogram(map=m2, n_bins=10000)
    print("Map histograms:", file=out)
    print("Map 1 (%s)     Map 2 (%s)"%\
      (params.input.map_1,params.input.map_2), file=out)
    print("(map_value,cdf,frequency) <> (map_value,cdf,frequency)", file=out)
    for a1, c1, v1, a2, c2, v2 in zip(h1.arguments(), h1.c_values(),
                                      h1.values(), h2.arguments(),
                                      h2.c_values(), h2.values()):
        print("(%9.5f %9.5f %9.5f) <> (%9.5f %9.5f %9.5f)"%\
          (a1,c1,v1, a2,c2,v2), file=out)

    # store results
    s1_dict = create_statistics_dict(s=s1)
    s2_dict = create_statistics_dict(s=s2)
    results = dict()
    inputs = list()
    for attribute in input_attributes:
        filename = getattr(params.input, attribute)
        if (filename is not None):
            inputs.append(filename)
    assert (len(inputs) == 2)
    results['map_files'] = inputs
    results['map_statistics'] = (s1_dict, s2_dict)
    results['cc_input_maps'] = cc_input_maps
    results['cc_quantile'] = cc_quantile
    results['cc_peaks'] = cc_peaks
    results['discrepancies'] = discrepancies
    # TODO, verify h1,h2 are not dicts, e.g. .values is py2/3 compat. I assume it is here
    results['map_histograms'] = ((h1.arguments(), h1.c_values(), h1.values()),
                                 (h2.arguments(), h2.c_values(), h2.values()))

    return results
Ejemplo n.º 5
0
    def validate_polder_map(self,
                            selection_bool,
                            xray_structure_noligand,
                            mask_data_polder,
                            box_cushion=2.1):
        '''
    The parameter box_cushion is hardcoded to be 2.1
    The value is related to the site_radii used for CC calculation (box_cushion - 0.1)
    Ideally the site_radii are calculated according to resolution, atom type and B factor for each atom
    However, for the purpose of polder map validation, it is a reasonable approximation
    to use 2.0.
    If this value is changed, it will affect the values of the CCs and therefore also the
    output messages (see mmtbx/programs/polder.py --> result_message)
    So modify this value with caution.
    '''
        # Significance check
        fmodel = mmtbx.f_model.manager(f_obs=self.f_obs,
                                       r_free_flags=self.r_free_flags,
                                       xray_structure=self.xray_structure)
        fmodel.update_all_scales(remove_outliers=False, fast=True)
        f_obs_1 = abs(fmodel.f_model())
        fmodel.update_xray_structure(xray_structure=xray_structure_noligand,
                                     update_f_calc=True,
                                     update_f_mask=True,
                                     force_update_f_mask=True)
        ## PVA: do we need it? fmodel.update_all_scales(remove_outliers=False)
        f_obs_2 = abs(fmodel.f_model())
        pdb_hierarchy_selected = self.pdb_hierarchy.select(selection_bool)
        xrs_selected = pdb_hierarchy_selected.extract_xray_structure(
            crystal_symmetry=self.cs)
        f_calc = fmodel.f_obs().structure_factors_from_scatterers(
            xray_structure=xray_structure_noligand).f_calc()
        f_mask = fmodel.f_obs().structure_factors_from_map(
            map=mask_data_polder,
            use_scale=True,
            anomalous_flag=False,
            use_sg=False)
        box_1 = self.get_polder_diff_map(f_obs=f_obs_1,
                                         r_free_flags=fmodel.r_free_flags(),
                                         f_calc=f_calc,
                                         f_mask=f_mask,
                                         xrs_selected=xrs_selected,
                                         box_cushion=box_cushion)
        box_2 = self.get_polder_diff_map(f_obs=f_obs_2,
                                         r_free_flags=fmodel.r_free_flags(),
                                         f_calc=f_calc,
                                         f_mask=f_mask,
                                         xrs_selected=xrs_selected,
                                         box_cushion=box_cushion)
        box_3 = self.get_polder_diff_map(f_obs=fmodel.f_obs(),
                                         r_free_flags=fmodel.r_free_flags(),
                                         f_calc=f_calc,
                                         f_mask=f_mask,
                                         xrs_selected=xrs_selected,
                                         box_cushion=box_cushion)

        sites_cart_box = box_1.xray_structure_box.sites_cart()
        sel = maptbx.grid_indices_around_sites(
            unit_cell=box_1.xray_structure_box.unit_cell(),
            fft_n_real=box_1.map_box.focus(),
            fft_m_real=box_1.map_box.all(),
            sites_cart=sites_cart_box,
            site_radii=flex.double(sites_cart_box.size(), box_cushion - 0.1))
        b1 = box_1.map_box.select(sel).as_1d()
        b2 = box_2.map_box.select(sel).as_1d()
        b3 = box_3.map_box.select(sel).as_1d()
        # Map 1: calculated Fobs with ligand
        # Map 2: calculated Fobs without ligand
        # Map 3: real Fobs data
        cc12 = flex.linear_correlation(x=b1, y=b2).coefficient()
        cc13 = flex.linear_correlation(x=b1, y=b3).coefficient()
        cc23 = flex.linear_correlation(x=b2, y=b3).coefficient()
        #### D-function
        b1 = maptbx.volume_scale_1d(map=b1, n_bins=10000).map_data()
        b2 = maptbx.volume_scale_1d(map=b2, n_bins=10000).map_data()
        b3 = maptbx.volume_scale_1d(map=b3, n_bins=10000).map_data()
        cc12_peak = flex.linear_correlation(x=b1, y=b2).coefficient()
        cc13_peak = flex.linear_correlation(x=b1, y=b3).coefficient()
        cc23_peak = flex.linear_correlation(x=b2, y=b3).coefficient()
        #### Peak CC:
        cutoffs = flex.double([i / 10. for i in range(1, 10)] +
                              [i / 100 for i in range(91, 100)])
        d12 = maptbx.discrepancy_function(map_1=b1, map_2=b2, cutoffs=cutoffs)
        d13 = maptbx.discrepancy_function(map_1=b1, map_2=b3, cutoffs=cutoffs)
        d23 = maptbx.discrepancy_function(map_1=b2, map_2=b3, cutoffs=cutoffs)
        pdb_hierarchy_selected.adopt_xray_structure(box_1.xray_structure_box)
        return group_args(box_1=box_1,
                          box_2=box_2,
                          box_3=box_3,
                          cc12=cc12,
                          cc13=cc13,
                          cc23=cc23,
                          cc12_peak=cc12_peak,
                          cc13_peak=cc13_peak,
                          cc23_peak=cc23_peak,
                          d12=d12,
                          d13=d13,
                          d23=d23,
                          cutoffs=cutoffs,
                          ph_selected=pdb_hierarchy_selected)
Ejemplo n.º 6
0
def run(args, out=sys.stdout, validated=False):
  show_citation(out=out)
  if (len(args) == 0):
    master_phil.show(out=out)
    print >> out,\
      '\nUsage: phenix.map_comparison <CCP4> <CCP4>\n',\
      '       phenix.map_comparison <CCP4> <MTZ> mtz_label_1=<label>\n',\
      '       phenix.map_comparison <MTZ 1> mtz_label_1=<label 1> <MTZ 2> mtz_label_2=<label 2>\n'
    sys.exit()

  # process arguments
  params = None
  input_attributes = ['map_1', 'mtz_1', 'map_2', 'mtz_2']
  try: # automatic parsing
    params = phil.process_command_line_with_files(
      args=args, master_phil=master_phil).work.extract()
  except Exception: # map_file_def only handles one map phil
    from libtbx.phil.command_line import argument_interpreter
    arg_int = argument_interpreter(master_phil=master_phil)
    command_line_args = list()
    map_files = list()
    for arg in args:
      if (os.path.isfile(arg)):
        map_files.append(arg)
      else:
        command_line_args.append(arg_int.process(arg))
    params = master_phil.fetch(sources=command_line_args).extract()

    # check if more files are necessary
    n_defined = 0
    for attribute in input_attributes:
      if (getattr(params.input, attribute) is not None):
        n_defined += 1

    # matches files to phil scope, stops once there is sufficient data
    for map_file in map_files:
      if (n_defined < 2):
        current_map = file_reader.any_file(map_file)
        if (current_map.file_type == 'ccp4_map'):
          n_defined += 1
          if (params.input.map_1 is None):
            params.input.map_1 = map_file
          elif (params.input.map_2 is None):
            params.input.map_2 = map_file
        elif (current_map.file_type == 'hkl'):
          n_defined += 1
          if (params.input.mtz_1 is None):
            params.input.mtz_1 = map_file
          elif (params.input.mtz_2 is None):
            params.input.mtz_2 = map_file
      else:
        print >> out, 'WARNING: only the first two files are used'
        break

  # validate arguments (GUI sets validated to true, no need to run again)
  assert (params is not None)
  if (not validated):
    validate_params(params)

  # ---------------------------------------------------------------------------
  # check if maps need to be generated from mtz
  n_maps = 0
  maps = list()
  map_names = list()
  for attribute in input_attributes:
    filename = getattr(params.input, attribute)
    if (filename is not None):
      map_names.append(filename)
      current_map = file_reader.any_file(filename)
      maps.append(current_map)
      if (current_map.file_type == 'ccp4_map'):
        n_maps += 1

  # construct maps, if necessary
  crystal_gridding = None
  m1 = None
  m2 = None

  # 1 map, 1 mtz file
  if (n_maps == 1):
    for current_map in maps:
      if (current_map.file_type == 'ccp4_map'):
        uc = current_map.file_object.unit_cell()
        sg_info = space_group_info(current_map.file_object.space_group_number)
        n_real = current_map.file_object.unit_cell_grid
        crystal_gridding = maptbx.crystal_gridding(
          uc, space_group_info=sg_info, pre_determined_n_real=n_real)
        m1 = current_map.file_object.map_data()
    if (crystal_gridding is not None):
      label = None
      for attribute in [('mtz_1', 'mtz_label_1'),
                        ('mtz_2', 'mtz_label_2')]:
        filename = getattr(params.input, attribute[0])
        label = getattr(params.input, attribute[1])
        if ( (filename is not None) and (label is not None) ):
          break
      # labels will match currently open mtz file
      for current_map in maps:
        if (current_map.file_type == 'hkl'):
          m2 = miller.fft_map(
            crystal_gridding=crystal_gridding,
            fourier_coefficients=current_map.file_server.get_miller_array(
              label)).apply_sigma_scaling().real_map_unpadded()
    else:
      raise Sorry('Gridding is not defined.')

  # 2 mtz files
  elif (n_maps == 0):
    crystal_symmetry = get_crystal_symmetry(maps[0])
    d_min = min(get_d_min(maps[0]), get_d_min(maps[1]))
    crystal_gridding = maptbx.crystal_gridding(
      crystal_symmetry.unit_cell(), d_min=d_min,
      resolution_factor=params.options.resolution_factor,
      space_group_info=crystal_symmetry.space_group_info())
    m1 = miller.fft_map(
      crystal_gridding=crystal_gridding,
      fourier_coefficients=maps[0].file_server.get_miller_array(
        params.input.mtz_label_1)).apply_sigma_scaling().real_map_unpadded()
    m2 = miller.fft_map(
      crystal_gridding=crystal_gridding,
      fourier_coefficients=maps[1].file_server.get_miller_array(
        params.input.mtz_label_2)).apply_sigma_scaling().real_map_unpadded()

  # 2 maps
  else:
    m1 = maps[0].file_object.map_data()
    m2 = maps[1].file_object.map_data()

  # ---------------------------------------------------------------------------
  # analyze maps
  assert ( (m1 is not None) and (m2 is not None) )

  # show general statistics
  s1 = maptbx.more_statistics(m1)
  s2 = maptbx.more_statistics(m2)
  show_overall_statistics(out=out, s=s1, header="Map 1 (%s):"%map_names[0])
  show_overall_statistics(out=out, s=s2, header="Map 2 (%s):"%map_names[1])
  cc_input_maps = flex.linear_correlation(x = m1.as_1d(),
                                          y = m2.as_1d()).coefficient()
  print >> out, "CC, input maps: %6.4f" % cc_input_maps

  # compute CCpeak
  cc_peaks = list()
  m1_he = maptbx.volume_scale(map = m1,  n_bins = 10000).map_data()
  m2_he = maptbx.volume_scale(map = m2,  n_bins = 10000).map_data()
  cc_quantile = flex.linear_correlation(x = m1_he.as_1d(),
                                        y = m2_he.as_1d()).coefficient()
  print >> out, "CC, quantile rank-scaled (histogram equalized) maps: %6.4f" % \
    cc_quantile
  print >> out, "Peak correlation:"
  print >> out, "  cutoff  CCpeak"
  cutoffs = [i/100.  for i in range(1,90)]+ [i/1000 for i in range(900,1000)]
  for cutoff in cutoffs:
    cc_peak = maptbx.cc_peak(map_1=m1_he, map_2=m2_he, cutoff=cutoff)
    print >> out, "  %3.2f   %7.4f" % (cutoff, cc_peak)
    cc_peaks.append((cutoff, cc_peak))

  # compute discrepancy function (D-function)
  discrepancies = list()
  cutoffs = flex.double(cutoffs)
  df = maptbx.discrepancy_function(map_1=m1_he, map_2=m2_he, cutoffs=cutoffs)
  print >> out, "Discrepancy function:"
  print >> out, "  cutoff  D"
  for c, d in zip(cutoffs, df):
    print >> out, "  %3.2f   %7.4f" % (c,d)
    discrepancies.append((c, d))

  # compute and output histograms
  h1 = maptbx.histogram(map=m1, n_bins=10000)
  h2 = maptbx.histogram(map=m2, n_bins=10000)
  print >> out, "Map histograms:"
  print >> out, "Map 1 (%s)     Map 2 (%s)"%\
    (params.input.map_1,params.input.map_2)
  print >> out, "(map_value,cdf,frequency) <> (map_value,cdf,frequency)"
  for a1,c1,v1, a2,c2,v2 in zip(h1.arguments(), h1.c_values(), h1.values(),
                                h2.arguments(), h2.c_values(), h2.values()):
    print >> out, "(%9.5f %9.5f %9.5f) <> (%9.5f %9.5f %9.5f)"%\
      (a1,c1,v1, a2,c2,v2)

  # store results
  s1_dict = create_statistics_dict(s=s1)
  s2_dict = create_statistics_dict(s=s2)
  results = dict()
  inputs = list()
  for attribute in input_attributes:
    filename = getattr(params.input,attribute)
    if (filename is not None):
      inputs.append(filename)
  assert (len(inputs) == 2)
  results['map_files'] = inputs
  results['map_statistics'] = (s1_dict, s2_dict)
  results['cc_input_maps'] = cc_input_maps
  results['cc_quantile'] = cc_quantile
  results['cc_peaks'] = cc_peaks
  results['discrepancies'] = discrepancies
  results['map_histograms'] = ( (h1.arguments(), h1.c_values(), h1.values()),
                                (h2.arguments(), h2.c_values(), h2.values()) )

  return results
Ejemplo n.º 7
0
def cmd_run(args, validated=False, out=sys.stdout):
  if (len(args) == 0):
    print >> out, "-"*79
    print >> out, "                               phenix.polder"
    print >> out, "-"*79
    print >> out, legend
    print >> out, "-"*79
    master_params.show(out=out)
    return
  log = multi_out()
  log.register("stdout", out)
  log_file_name = "polder.log"
  logfile = open(log_file_name, "w")
  log.register("logfile", logfile)
  print >> log, "phenix.polder is running..."
  print >> log, "input parameters:\n", args
  parsed = master_params
  inputs = mmtbx.utils.process_command_line_args(args = args,
    master_params = parsed)
  #inputs.params.show() #check
  params = inputs.params.extract()
  # check model file
  if len(inputs.pdb_file_names) == 0:
    if (params.model_file_name is None):
      raise Sorry("No model file found.")
  elif (len(inputs.pdb_file_names) == 1):
    params.model_file_name = inputs.pdb_file_names[0]
  else:
    raise Sorry("Only one model file should be given")
  # check reflection file
  reflection_files = inputs.reflection_files
  if (len(reflection_files) == 0):
    if (params.reflection_file_name is None):
      raise Sorry("No reflection file found.")
    else:
      hkl_in = file_reader.any_file(params.reflection_file_name,
        force_type="hkl")
      hkl_in.assert_file_type("hkl")
      reflection_files = [ hkl_in.file_object ]
  # crystal symmetry
  crystal_symmetry = None
  crystal_symmetry = inputs.crystal_symmetry
  if (crystal_symmetry is None):
    crystal_symmetries = []
    for f in [str(params.model_file_name), str(params.reflection_file_name)]:
      cs = crystal_symmetry_from_any.extract_from(f)
      if(cs is not None): crystal_symmetries.append(cs)
    if(len(crystal_symmetries) == 1): crystal_symmetry = crystal_symmetries[0]
    elif(len(crystal_symmetries) == 0):
      raise Sorry("No crystal symmetry found.")
    else:
      if(not crystal_symmetries[0].is_similar_symmetry(crystal_symmetries[1])):
        raise Sorry("Crystal symmetry mismatch between different files.")
      crystal_symmetry = crystal_symmetries[0]
  f_obs, r_free_flags = None, None
  rfs = reflection_file_utils.reflection_file_server(
    crystal_symmetry = crystal_symmetry,
    force_symmetry   = True,
    reflection_files = reflection_files,
    err              = StringIO())
  parameters = mmtbx.utils.data_and_flags_master_params().extract()
  if (params.data_labels is not None):
    parameters.labels = params.data_labels
  if (params.r_free_flags_labels is not None):
    parameters.r_free_flags.label = params.r_free_flags_labels
  determined_data_and_flags = mmtbx.utils.determine_data_and_flags(
    reflection_file_server = rfs,
    parameters             = parameters,
    keep_going             = True,
    log                    = StringIO())
  f_obs = determined_data_and_flags.f_obs
  if (params.data_labels is None):
    params.data_labels = f_obs.info().label_string()
  if (params.reflection_file_name is None):
    params.reflection_file_name = parameters.file_name
  r_free_flags = determined_data_and_flags.r_free_flags
  assert f_obs is not None
  print >> log,  "Input data:"
  print >> log, "  Iobs or Fobs:", f_obs.info().labels
  if (r_free_flags is not None):
    print >> log, "  Free-R flags:", r_free_flags.info().labels
    params.r_free_flags_labels = r_free_flags.info().label_string()
  else:
    print >> log, "  Free-R flags: Not present"
  model_basename = os.path.basename(params.model_file_name.split(".")[0])
  if (len(model_basename) > 0 and
    params.output_file_name_prefix is None):
    params.output_file_name_prefix = model_basename
  print params.output_file_name_prefix
  new_params =  master_params.format(python_object=params)
  new_params.show()
  if (not validated):
    validate_params(params)
  pdb_input = iotbx.pdb.input(file_name = params.model_file_name)
  pdb_hierarchy = pdb_input.construct_hierarchy()
  xray_structure = pdb_hierarchy.extract_xray_structure(
    crystal_symmetry = crystal_symmetry)
  # DON'T USE:
  # xray_structure = pdb_input.xray_structure_simple()
  # atom order might be wrong
  mmtbx.utils.setup_scattering_dictionaries(
    scattering_table = params.scattering_table,
    xray_structure   = xray_structure,
    d_min            = f_obs.d_min())
  #if f_obs is not None:
  f_obs = f_obs.resolution_filter(
    d_min = params.high_resolution,
    d_max = params.low_resolution)
  if (r_free_flags is not None):
    r_free_flags = r_free_flags.resolution_filter(
      d_min = params.high_resolution,
      d_max = params.low_resolution)
# Grab case that data are anomalous
  if (f_obs.anomalous_flag()):
    f_obs, r_free_flags = prepare_f_obs_and_flags(
      f_obs        = f_obs,
      r_free_flags = r_free_flags)
  cpm_obj = compute_polder_map(
    f_obs          = f_obs,
    r_free_flags   = r_free_flags,
    xray_structure = xray_structure,
    pdb_hierarchy  = pdb_hierarchy,
    params         = params,
    log            = log)
# Significance check
  fmodel = mmtbx.f_model.manager(
    f_obs          = f_obs,
    r_free_flags   = r_free_flags,
    xray_structure = xray_structure)
  fmodel.update_all_scales(remove_outliers=False, fast=True)
  f_obs_1 = abs(fmodel.f_model())
  fmodel.update_xray_structure(xray_structure=cpm_obj.xray_structure_noligand,
    update_f_calc=True, update_f_mask=True, force_update_f_mask=True)
  # PVA: do we need it? fmodel.update_all_scales(remove_outliers=False)
  f_obs_2 = abs(fmodel.f_model())
  xrs_selected = cpm_obj.pdb_hierarchy_selected.extract_xray_structure(
    crystal_symmetry = f_obs.crystal_symmetry())
  f_calc = f_obs.structure_factors_from_scatterers(
    xray_structure = cpm_obj.xray_structure_noligand).f_calc()
  f_mask = f_obs.structure_factors_from_map(
    map            = cpm_obj.mask_polder,
    use_scale      = True,
    anomalous_flag = False,
    use_sg         = False)
  def get_poler_diff_map(f_obs):
    fmodel = mmtbx.f_model.manager(
      f_obs        = f_obs,
      r_free_flags = r_free_flags,
      f_calc       = f_calc,
      f_mask       = f_mask)
    fmodel.update_all_scales(remove_outliers=False)
    mc_diff = map_tools.electron_density_map(
      fmodel = fmodel).map_coefficients(
        map_type         = "mFo-DFc",
        isotropize       = True,
        fill_missing     = False)
    fft_map = miller.fft_map(
      crystal_gridding     = cpm_obj.crystal_gridding,
      fourier_coefficients = mc_diff)
    fft_map.apply_sigma_scaling()
    map_data = fft_map.real_map_unpadded()
    return mmtbx.utils.extract_box_around_model_and_map(
      xray_structure = xrs_selected,
      map_data       = map_data,
      box_cushion    = 2.1)
  box_1=get_poler_diff_map(f_obs = f_obs_1)
  box_2=get_poler_diff_map(f_obs = f_obs_2)
  box_3=get_poler_diff_map(f_obs = f_obs)
  sites_cart_box = box_1.xray_structure_box.sites_cart()
  sel = maptbx.grid_indices_around_sites(
    unit_cell  = box_1.xray_structure_box.unit_cell(),
    fft_n_real = box_1.map_box.focus(),
    fft_m_real = box_1.map_box.all(),
    sites_cart = sites_cart_box,
    site_radii = flex.double(sites_cart_box.size(), 2.0))
  b1 = box_1.map_box.select(sel).as_1d()
  b2 = box_2.map_box.select(sel).as_1d()
  b3 = box_3.map_box.select(sel).as_1d()
  print >> log, "Map 1: calculated Fobs with ligand"
  print >> log, "Map 2: calculated Fobs without ligand"
  print >> log, "Map 3: real Fobs data"
  print >>log, "CC(1,2): %6.4f"%flex.linear_correlation(x=b1,y=b2).coefficient()
  print >>log, "CC(1,3): %6.4f"%flex.linear_correlation(x=b1,y=b3).coefficient()
  print >>log, "CC(2,3): %6.4f"%flex.linear_correlation(x=b2,y=b3).coefficient()
  ### D-function
  b1 = maptbx.volume_scale_1d(map=b1, n_bins=10000).map_data()
  b2 = maptbx.volume_scale_1d(map=b2, n_bins=10000).map_data()
  b3 = maptbx.volume_scale_1d(map=b3, n_bins=10000).map_data()
  print >> log, "Peak CC:"
  print >>log, "CC(1,2): %6.4f"%flex.linear_correlation(x=b1,y=b2).coefficient()
  print >>log, "CC(1,3): %6.4f"%flex.linear_correlation(x=b1,y=b3).coefficient()
  print >>log, "CC(2,3): %6.4f"%flex.linear_correlation(x=b2,y=b3).coefficient()
  cutoffs = flex.double(
    [i/10. for i in range(1,10)]+[i/100 for i in range(91,100)])
  d12 = maptbx.discrepancy_function(map_1=b1, map_2=b2, cutoffs=cutoffs)
  d13 = maptbx.discrepancy_function(map_1=b1, map_2=b3, cutoffs=cutoffs)
  d23 = maptbx.discrepancy_function(map_1=b2, map_2=b3, cutoffs=cutoffs)
  print >> log, "q    D(1,2) D(1,3) D(2,3)"
  for c,d12_,d13_,d23_ in zip(cutoffs,d12,d13,d23):
    print >> log, "%4.2f %6.4f %6.4f %6.4f"%(c,d12_,d13_,d23_)
  ###
  if(params.debug):
    box_1.write_ccp4_map(file_name="box_1_polder.ccp4")
    box_2.write_ccp4_map(file_name="box_2_polder.ccp4")
    box_3.write_ccp4_map(file_name="box_3_polder.ccp4")
    cpm_obj.pdb_hierarchy_selected.adopt_xray_structure(
      box_1.xray_structure_box)
    cpm_obj.pdb_hierarchy_selected.write_pdb_file(file_name="box_polder.pdb",
      crystal_symmetry=box_1.box_crystal_symmetry)
  #
  print >> log, "Finished."
  return True
Ejemplo n.º 8
0
    def validate_polder_map(self):
        # Significance check
        fmodel = mmtbx.f_model.manager(f_obs=self.f_obs,
                                       r_free_flags=self.r_free_flags,
                                       xray_structure=self.xray_structure)
        fmodel.update_all_scales(remove_outliers=False, fast=True)
        f_obs_1 = abs(fmodel.f_model())
        fmodel.update_xray_structure(
            xray_structure=self.xray_structure_noligand,
            update_f_calc=True,
            update_f_mask=True,
            force_update_f_mask=True)
        ## PVA: do we need it? fmodel.update_all_scales(remove_outliers=False)
        f_obs_2 = abs(fmodel.f_model())
        pdb_hierarchy_selected = self.pdb_hierarchy.select(self.selection_bool)
        xrs_selected = pdb_hierarchy_selected.extract_xray_structure(
            crystal_symmetry=self.f_obs.crystal_symmetry())
        f_calc = fmodel.f_obs().structure_factors_from_scatterers(
            xray_structure=self.xray_structure_noligand).f_calc()
        f_mask = fmodel.f_obs().structure_factors_from_map(
            map=self.mask_data_polder,
            use_scale=True,
            anomalous_flag=False,
            use_sg=False)
        box_1 = self.get_polder_diff_map(f_obs=f_obs_1,
                                         r_free_flags=fmodel.r_free_flags(),
                                         f_calc=f_calc,
                                         f_mask=f_mask,
                                         xrs_selected=xrs_selected)
        box_2 = self.get_polder_diff_map(f_obs=f_obs_2,
                                         r_free_flags=fmodel.r_free_flags(),
                                         f_calc=f_calc,
                                         f_mask=f_mask,
                                         xrs_selected=xrs_selected)
        box_3 = self.get_polder_diff_map(f_obs=fmodel.f_obs(),
                                         r_free_flags=fmodel.r_free_flags(),
                                         f_calc=f_calc,
                                         f_mask=f_mask,
                                         xrs_selected=xrs_selected)

        sites_cart_box = box_1.xray_structure_box.sites_cart()
        sel = maptbx.grid_indices_around_sites(
            unit_cell=box_1.xray_structure_box.unit_cell(),
            fft_n_real=box_1.map_box.focus(),
            fft_m_real=box_1.map_box.all(),
            sites_cart=sites_cart_box,
            site_radii=flex.double(sites_cart_box.size(), 2.0))
        b1 = box_1.map_box.select(sel).as_1d()
        b2 = box_2.map_box.select(sel).as_1d()
        b3 = box_3.map_box.select(sel).as_1d()
        # Map 1: calculated Fobs with ligand
        # Map 2: calculated Fobs without ligand
        # Map 3: real Fobs data
        cc12 = flex.linear_correlation(x=b1, y=b2).coefficient()
        cc13 = flex.linear_correlation(x=b1, y=b3).coefficient()
        cc23 = flex.linear_correlation(x=b2, y=b3).coefficient()
        #### D-function
        b1 = maptbx.volume_scale_1d(map=b1, n_bins=10000).map_data()
        b2 = maptbx.volume_scale_1d(map=b2, n_bins=10000).map_data()
        b3 = maptbx.volume_scale_1d(map=b3, n_bins=10000).map_data()
        cc12_peak = flex.linear_correlation(x=b1, y=b2).coefficient()
        cc13_peak = flex.linear_correlation(x=b1, y=b3).coefficient()
        cc23_peak = flex.linear_correlation(x=b2, y=b3).coefficient()
        #### Peak CC:
        cutoffs = flex.double([i / 10. for i in range(1, 10)] +
                              [i / 100 for i in range(91, 100)])
        d12 = maptbx.discrepancy_function(map_1=b1, map_2=b2, cutoffs=cutoffs)
        d13 = maptbx.discrepancy_function(map_1=b1, map_2=b3, cutoffs=cutoffs)
        d23 = maptbx.discrepancy_function(map_1=b2, map_2=b3, cutoffs=cutoffs)
        pdb_hierarchy_selected.adopt_xray_structure(box_1.xray_structure_box)
        self.validation_results = group_args(
            box_1=box_1,
            box_2=box_2,
            box_3=box_3,
            cc12=cc12,
            cc13=cc13,
            cc23=cc23,
            cc12_peak=cc12_peak,
            cc13_peak=cc13_peak,
            cc23_peak=cc23_peak,
            d12=d12,
            d13=d13,
            d23=d23,
            cutoffs=cutoffs,
            ph_selected=pdb_hierarchy_selected)
Ejemplo n.º 9
0
def run(args, validated=False):
  show_citation()
  if ( (len(args) == 0) or (len(args) > 2) ):
    print '\nUsage: phenix.map_comparison map_1=<first map> map_2=<second map>\n'
    sys.exit()

  # process arguments
  try: # automatic parsing
    params = phil.process_command_line_with_files(
      args=args, master_phil=master_phil).work.extract()
  except Exception: # map_file_def only handles one map phil
    from libtbx.phil.command_line import argument_interpreter
    arg_int = argument_interpreter(master_phil=master_phil)
    command_line_args = list()
    map_files = list()
    for arg in args:
      if (os.path.isfile(arg)):
        map_files.append(arg)
      else:
        command_line_args.append(arg_int.process(arg))
    params = master_phil.fetch(sources=command_line_args).extract()
    for map_file in map_files:
      if (params.input.map_1 is None):
        params.input.map_1 = map_file
      else:
        params.input.map_2 = map_file

  # validate arguments (GUI sets validated to true, no need to run again)
  if (not validated):
    validate_params(params)

  # ---------------------------------------------------------------------------
  # map 1
  ccp4_map_1 = iotbx.ccp4_map.map_reader(file_name=params.input.map_1)
  cs_1 = crystal.symmetry(ccp4_map_1.unit_cell().parameters(),
    ccp4_map_1.space_group_number)
  m1 = ccp4_map_1.map_data()

  # map 2
  ccp4_map_2 = iotbx.ccp4_map.map_reader(file_name=params.input.map_2)
  cs_2 = crystal.symmetry(ccp4_map_2.unit_cell().parameters(),
    ccp4_map_2.space_group_number)
  m2 = ccp4_map_2.map_data()

  # show general statistics
  s1 = maptbx.more_statistics(m1)
  s2 = maptbx.more_statistics(m2)
  show_overall_statistics(s=s1, header="Map 1 (%s):"%params.input.map_1)
  show_overall_statistics(s=s2, header="Map 2 (%s):"%params.input.map_2)
  cc_input_maps = flex.linear_correlation(x = m1.as_1d(),
                                          y = m2.as_1d()).coefficient()
  print "CC, input maps: %6.4f" % cc_input_maps

  # compute CCpeak
  cc_peaks = list()
  m1_he = maptbx.volume_scale(map = m1,  n_bins = 10000).map_data()
  m2_he = maptbx.volume_scale(map = m2,  n_bins = 10000).map_data()
  cc_quantile = flex.linear_correlation(x = m1_he.as_1d(),
                                        y = m2_he.as_1d()).coefficient()
  print "CC, quantile rank-scaled (histogram equalized) maps: %6.4f" % \
    cc_quantile
  print "Peak correlation:"
  print "  cutoff  CCpeak"
  for cutoff in [i/100. for i in range(0,100,5)]+[0.99, 1.0]:
    cc_peak = maptbx.cc_peak(map_1=m1_he, map_2=m2_he, cutoff=cutoff)
    print "  %3.2f   %7.4f" % (cutoff, cc_peak)
    cc_peaks.append((cutoff, cc_peak))

  # compute discrepancy function (D-function)
  discrepancies = list()
  cutoffs = flex.double([i/20. for i in range(1,20)])
  df = maptbx.discrepancy_function(map_1=m1_he, map_2=m2_he, cutoffs=cutoffs)
  print "Discrepancy function:"
  print "  cutoff  D"
  for c, d in zip(cutoffs, df):
    print "  %3.2f   %7.4f" % (c,d)
    discrepancies.append((c, d))

  # compute and output histograms
  h1 = maptbx.histogram(map=m1, n_bins=10000)
  h2 = maptbx.histogram(map=m2, n_bins=10000)
  print "Map histograms:"
  print "Map 1 (%s)     Map 2 (%s)"%(params.input.map_1,params.input.map_2)
  print "(map_value,cdf,frequency) <> (map_value,cdf,frequency)"
  for a1,c1,v1, a2,c2,v2 in zip(h1.arguments(), h1.c_values(), h1.values(),
                                h2.arguments(), h2.c_values(), h2.values()):
    print "(%9.5f %9.5f %9.5f) <> (%9.5f %9.5f %9.5f)"%(a1,c1,v1, a2,c2,v2)

  # store results
  s1_dict = create_statistics_dict(s1)
  s2_dict = create_statistics_dict(s2)
  results = dict()
  results['map_files'] = (params.input.map_1, params.input.map_2)
  results['map_statistics'] = (s1_dict, s2_dict)
  results['cc_input_maps'] = cc_input_maps
  results['cc_quantile'] = cc_quantile
  results['cc_peaks'] = cc_peaks
  results['discrepancies'] = discrepancies
  results['map_histograms'] = ( (h1.arguments(), h1.c_values(), h1.values()),
                                (h2.arguments(), h2.c_values(), h2.values()) )

  return results