Example #1
0
  def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8,
                molprobity_map_params=None) :

    from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
    from mmtbx import real_space_correlation

    validation.__init__(self)

    # arrays for different components
    self.everything = list()
    self.protein = list()
    self.other = list()
    self.water = list()
    aa_codes = one_letter_given_three_letter.keys()

    # redo real_space_corelation.simple to use map objects instead of filenames
    try :
      rsc_params = real_space_correlation.master_params().extract()
      rsc_params.detail="residue"
      rsc_params.map_1.fill_missing_reflections = False
      rsc_params.map_2.fill_missing_reflections = False
      if (molprobity_map_params is not None):
        rsc_params.map_file_name = molprobity_map_params.map_file_name
        rsc_params.map_coefficients_file_name = \
          molprobity_map_params.map_coefficients_file_name
        rsc_params.map_coefficients_label = \
          molprobity_map_params.map_coefficients_label
      rsc = real_space_correlation.simple(
        fmodel=fmodel,
        pdb_hierarchy=pdb_hierarchy,
        params=rsc_params,
        log=null_out())
    except Exception, e :
      raise
  def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8,
                molprobity_map_params=None) :

    from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
    from mmtbx import real_space_correlation

    validation.__init__(self)

    # arrays for different components
    self.everything = list()
    self.protein = list()
    self.other = list()
    self.water = list()
    aa_codes = one_letter_given_three_letter.keys()

    # redo real_space_corelation.simple to use map objects instead of filenames
    try :
      rsc_params = real_space_correlation.master_params().extract()
      rsc_params.detail="residue"
      rsc_params.map_1.fill_missing_reflections = False
      rsc_params.map_2.fill_missing_reflections = False
      if (molprobity_map_params is not None):
        rsc_params.map_file_name = molprobity_map_params.map_file_name
        rsc_params.map_coefficients_file_name = \
          molprobity_map_params.map_coefficients_file_name
        rsc_params.map_coefficients_label = \
          molprobity_map_params.map_coefficients_label
      rsc = real_space_correlation.simple(
        fmodel=fmodel,
        pdb_hierarchy=pdb_hierarchy,
        params=rsc_params,
        log=null_out())
    except Exception, e :
      raise e
Example #3
0
  def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8) :

    from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
    from mmtbx import real_space_correlation

    validation.__init__(self)

    # arrays for different components
    self.everything = list()
    self.protein = list()
    self.other = list()
    self.water = list()
    aa_codes = one_letter_given_three_letter.keys()

    try :
      rsc_params = real_space_correlation.master_params().extract()
      rsc_params.detail="residue"
      rsc_params.map_1.fill_missing_reflections = False
      rsc_params.map_2.fill_missing_reflections = False
      rsc = real_space_correlation.simple(
        fmodel=fmodel,
        pdb_hierarchy=pdb_hierarchy,
        params=rsc_params,
        log=null_out())
    except Exception, e :
      raise "Error: %s" % str(e)
def exercise_1():
  pdb_file = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/1yjp_h.pdb",
    test=os.path.isfile)
  mtz_file = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/reflection_files/1yjp.mtz",
    test=os.path.isfile)
  if (None in [pdb_file, mtz_file]) :
    print "phenix_regression not found, skipping test"
    return False
  pdb_in = file_reader.any_file(pdb_file)
  hierarchy = pdb_in.file_object.hierarchy
  hierarchy.atoms().reset_i_seq()
  xrs = pdb_in.file_object.xray_structure_simple()
  mtz_in = file_reader.any_file(mtz_file)
  f_obs = mtz_in.file_server.miller_arrays[0]
  r_free = mtz_in.file_server.miller_arrays[1]
  r_free = r_free.customized_copy(data=(r_free.data()==1))
  fmodel = mmtbx.utils.fmodel_simple(
    f_obs=f_obs,
    r_free_flags=r_free,
    xray_structures=[xrs],
    scattering_table="n_gaussian")
  map_stats = real_space_correlation.map_statistics_for_fragment(
    fragment=hierarchy,
    fmodel=fmodel)
  assert approx_equal(map_stats.cc, 0.960, eps=0.01)
  edm = fmodel.electron_density_map()
  map1_coeffs = edm.map_coefficients("2mFo-DFc")
  map1 = map1_coeffs.fft_map(
    resolution_factor=0.25).apply_sigma_scaling().real_map()
  map2_coeffs = edm.map_coefficients("Fmodel")
  map2 = map2_coeffs.fft_map(
    resolution_factor=0.25).apply_sigma_scaling().real_map()
  xray_structure = fmodel.xray_structure
  map_stats2 = real_space_correlation.map_statistics_for_atom_selection(
    atom_selection=flex.bool(xrs.sites_cart().size(), True),
    map1=map1,
    map2=map2,
    xray_structure=xrs)
  assert approx_equal(map_stats2.cc, map_stats.cc, 0.01)
  # XXX other code outside cctbx depends on the current API - do not simply
  # change the test if this breaks!
  results = real_space_correlation.simple(
    fmodel=fmodel,
    pdb_hierarchy=hierarchy,
    log=null_out())
  assert isinstance(results, list)
  assert isinstance(results[0], group_args)
  assert (results[0].n_atoms == 1)
  assert (results[0].id_str == " A   GLY    1    N  ")
  return True
def exercise_1():
    pdb_file = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/pdb/1yjp_h.pdb", test=os.path.isfile)
    mtz_file = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/reflection_files/1yjp.mtz",
        test=os.path.isfile)
    if (None in [pdb_file, mtz_file]):
        print "phenix_regression not found, skipping test"
        return False
    pdb_in = file_reader.any_file(pdb_file)
    hierarchy = pdb_in.file_object.hierarchy
    hierarchy.atoms().reset_i_seq()
    xrs = pdb_in.file_object.xray_structure_simple()
    mtz_in = file_reader.any_file(mtz_file)
    f_obs = mtz_in.file_server.miller_arrays[0]
    r_free = mtz_in.file_server.miller_arrays[1]
    r_free = r_free.customized_copy(data=(r_free.data() == 1))
    fmodel = mmtbx.utils.fmodel_simple(f_obs=f_obs,
                                       r_free_flags=r_free,
                                       xray_structures=[xrs],
                                       scattering_table="n_gaussian")
    map_stats = real_space_correlation.map_statistics_for_fragment(
        fragment=hierarchy, fmodel=fmodel)
    assert approx_equal(map_stats.cc, 0.960, eps=0.01)
    edm = fmodel.electron_density_map()
    map1_coeffs = edm.map_coefficients("2mFo-DFc")
    map1 = map1_coeffs.fft_map(
        resolution_factor=0.25).apply_sigma_scaling().real_map()
    map2_coeffs = edm.map_coefficients("Fmodel")
    map2 = map2_coeffs.fft_map(
        resolution_factor=0.25).apply_sigma_scaling().real_map()
    xray_structure = fmodel.xray_structure
    map_stats2 = real_space_correlation.map_statistics_for_atom_selection(
        atom_selection=flex.bool(xrs.sites_cart().size(), True),
        map1=map1,
        map2=map2,
        xray_structure=xrs)
    assert approx_equal(map_stats2.cc, map_stats.cc, 0.01)
    # XXX other code outside cctbx depends on the current API - do not simply
    # change the test if this breaks!
    overall_cc, results = real_space_correlation.simple(
        fmodel=fmodel, pdb_hierarchy=hierarchy, log=null_out())
    assert isinstance(overall_cc, float)
    assert isinstance(results, list)
    assert isinstance(results[0], group_args)
    assert (results[0].n_atoms == 1)
    assert (results[0].id_str == " A   GLY    1    N  ")
    return True
Example #6
0
def run(args,
        command_name             = "mmtbx.model_vs_data",
        show_geometry_statistics = True,
        model_size_max_atoms     = 80000,
        data_size_max_reflections= 1000000,
        unit_cell_max_dimension  = 800.,
        return_fmodel_and_pdb    = False,
        out                      = None,
        log                      = sys.stdout):
  import mmtbx.f_model_info
  if(len(args)==0) or (args == ["--help"]) :
    print >> log, msg
    defaults(log=log, silent=False)
    return
  parsed = defaults(log=log, silent=True)
  #
  mvd_obj = mvd()
  #
  processed_args = utils.process_command_line_args(args = args,
    log = log, master_params = parsed)
  params = processed_args.params.extract()
  #
  reflection_files = processed_args.reflection_files
  if(len(reflection_files) == 0):
    raise Sorry("No reflection file found.")
  crystal_symmetry = processed_args.crystal_symmetry
  if(crystal_symmetry is None):
    raise Sorry("No crystal symmetry found.")
  if(len(processed_args.pdb_file_names) == 0):
    raise Sorry("No PDB file found.")
  pdb_file_names = processed_args.pdb_file_names
  #
  rfs = reflection_file_server(
    crystal_symmetry = crystal_symmetry,
    reflection_files = reflection_files)
  parameters = utils.data_and_flags_master_params().extract()
  if(params.f_obs_label is not None):
    parameters.labels = params.f_obs_label
  if(params.r_free_flags_label is not None):
    parameters.r_free_flags.label = params.r_free_flags_label
  if (params.high_resolution is not None) :
    parameters.high_resolution = params.high_resolution
  determine_data_and_flags_result = utils.determine_data_and_flags(
    reflection_file_server  = rfs,
    parameters              = parameters,
    data_parameter_scope    = "refinement.input.xray_data",
    flags_parameter_scope   = "refinement.input.xray_data.r_free_flags",
    data_description        = "X-ray data",
    keep_going              = True,
    log                     = StringIO())
  f_obs = determine_data_and_flags_result.f_obs
  number_of_reflections = f_obs.indices().size()
  if(params.ignore_giant_models_and_datasets and
     number_of_reflections > data_size_max_reflections):
    raise Sorry("Too many reflections: %d"%number_of_reflections)
  #
  max_unit_cell_dimension = max(f_obs.unit_cell().parameters()[:3])
  if(params.ignore_giant_models_and_datasets and
     max_unit_cell_dimension > unit_cell_max_dimension):
    raise Sorry("Too large unit cell (max dimension): %s"%
      str(max_unit_cell_dimension))
  #
  r_free_flags = determine_data_and_flags_result.r_free_flags
  test_flag_value = determine_data_and_flags_result.test_flag_value
  if(r_free_flags is None):
    r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False))
    test_flag_value=None
  #
  mmtbx_pdb_file = mmtbx.utils.pdb_file(
    pdb_file_names        = pdb_file_names,
    cif_objects           = processed_args.cif_objects,
    crystal_symmetry      = crystal_symmetry,
    use_neutron_distances = (params.scattering_table=="neutron"),
    ignore_unknown_nonbonded_energy_types = not show_geometry_statistics,
    log                   = log)
  mmtbx_pdb_file.set_ppf(stop_if_duplicate_labels = False)
  processed_pdb_file = mmtbx_pdb_file.processed_pdb_file
  pdb_raw_records = mmtbx_pdb_file.pdb_raw_records
  pdb_inp = mmtbx_pdb_file.pdb_inp
  #
  # just to avoid going any further with bad PDB file....
  pdb_inp.xray_structures_simple()
  #
  acp = processed_pdb_file.all_chain_proxies
  atom_selections = group_args(
    all           = acp.selection(string = "all"),
    macromolecule = acp.selection(string = "protein or dna or rna"),
    solvent       = acp.selection(string = "water"), # XXX single_atom_residue
    ligand        = acp.selection(string = "not (protein or dna or rna or water)"),
    backbone      = acp.selection(string = "backbone"),
    sidechain     = acp.selection(string = "sidechain"))
  #
  scattering_table = params.scattering_table
  exptl_method = pdb_inp.get_experiment_type()
  if (exptl_method is not None) and ("NEUTRON" in exptl_method) :
    scattering_table = "neutron"
  xsfppf = mmtbx.utils.xray_structures_from_processed_pdb_file(
    processed_pdb_file = processed_pdb_file,
    scattering_table   = scattering_table,
    d_min              = f_obs.d_min())
  xray_structures = xsfppf.xray_structures
  if(0): #XXX normalize occupancies if all models have occ=1 so the total=1
    n_models = len(xray_structures)
    for xrs in xray_structures:
      occ = xrs.scatterers().extract_occupancies()
      occ = occ/n_models
      xrs.set_occupancies(occ)
  model_selections = xsfppf.model_selections
  mvd_obj.collect(crystal = group_args(
    uc       = f_obs.unit_cell(),
    sg       = f_obs.crystal_symmetry().space_group_info().symbol_and_number(),
    n_sym_op = f_obs.crystal_symmetry().space_group_info().type().group().order_z(),
    uc_vol   = f_obs.unit_cell().volume()))
  #
  hierarchy = pdb_inp.construct_hierarchy()
  pdb_atoms = hierarchy.atoms()
  pdb_atoms.reset_i_seq()
  #
  # Extract TLS
  pdb_tls = None
  pdb_inp_tls = pdb_inp.extract_tls_params(hierarchy)
  pdb_tls = group_args(pdb_inp_tls           = pdb_inp_tls,
                       tls_selections        = [],
                       tls_selection_strings = [])
  # XXX no TLS + multiple models
  if(pdb_inp_tls.tls_present and pdb_inp_tls.error_string is None and
     len(xray_structures)==1):
    pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb(
      pdb_inp_tls       = pdb_inp_tls,
      all_chain_proxies = mmtbx_pdb_file.processed_pdb_file.all_chain_proxies,
      xray_structure    = xsfppf.xray_structure_all)
    if(len(pdb_tls.tls_selections)==len(pdb_inp_tls.tls_params) and
       len(pdb_inp_tls.tls_params) > 0):
      xray_structures = [utils.extract_tls_and_u_total_from_pdb(
        f_obs          = f_obs,
        r_free_flags   = r_free_flags,
        xray_structure = xray_structures[0], # XXX no TLS + multiple models
        tls_selections = pdb_tls.tls_selections,
        tls_groups     = pdb_inp_tls.tls_params)]
  ###########################
  geometry_statistics = show_geometry(
    xray_structures          = xray_structures,
    processed_pdb_file       = processed_pdb_file,
    scattering_table         = scattering_table,
    hierarchy                = hierarchy,
    model_selections         = model_selections,
    show_geometry_statistics = show_geometry_statistics,
    mvd_obj                  = mvd_obj,
    atom_selections          = atom_selections)
  ###########################
  mp = mmtbx.masks.mask_master_params.extract()
  f_obs_labels = f_obs.info().label_string()
  f_obs = f_obs.sort(reverse=True, by_value="packed_indices")
  r_free_flags = r_free_flags.sort(reverse=True, by_value="packed_indices")
  fmodel = utils.fmodel_simple(
    xray_structures     = xray_structures,
    scattering_table    = scattering_table,
    mask_params         = mp,
    f_obs               = f_obs,
    r_free_flags        = r_free_flags,
    skip_twin_detection = params.skip_twin_detection)
  n_outl = f_obs.data().size() - fmodel.f_obs().data().size()
  mvd_obj.collect(model_vs_data = show_model_vs_data(fmodel))
  # Extract information from PDB file header and output (if any)
  pub_r_work       = None
  pub_r_free       = None
  pub_high         = None
  pub_low          = None
  pub_sigma        = None
  pub_program_name = None
  pub_solv_cont    = None
  pub_matthews     = None
  published_results = pdb_inp.get_r_rfree_sigma(file_name=pdb_file_names[0])
  if(published_results is not None):
    pub_r_work = published_results.r_work
    pub_r_free = published_results.r_free
    pub_high   = published_results.high
    pub_low    = published_results.low
    pub_sigma  = published_results.sigma
  pub_program_name = pdb_inp.get_program_name()
  pub_solv_cont    = pdb_inp.get_solvent_content()
  pub_matthews     = pdb_inp.get_matthews_coeff()
  mvd_obj.collect(pdb_header = group_args(
    program_name    = pub_program_name,
    year            = pdb_inp.extract_header_year(),
    r_work          = pub_r_work,
    r_free          = pub_r_free,
    high_resolution = pub_high,
    low_resolution  = pub_low,
    sigma_cutoff    = pub_sigma,
    matthews_coeff  = pub_matthews,
    solvent_cont    = pub_solv_cont,
    tls             = pdb_tls,
    exptl_method    = exptl_method))
  #
  # Recompute R-factors using published cutoffs
  fmodel_cut = fmodel
  tmp_sel = flex.bool(fmodel.f_obs().data().size(), True)
  if(pub_sigma is not None and fmodel.f_obs().sigmas() is not None):
    tmp_sel &= fmodel.f_obs().data() > fmodel.f_obs().sigmas()*pub_sigma
  if(pub_high is not None and abs(pub_high-fmodel.f_obs().d_min()) > 0.03):
    tmp_sel &= fmodel.f_obs().d_spacings().data() > pub_high
  if(pub_low is not None and abs(pub_low-fmodel.f_obs().d_max_min()[0]) > 0.03):
    tmp_sel &= fmodel.f_obs().d_spacings().data() < pub_low
  if(tmp_sel.count(True) != tmp_sel.size() and tmp_sel.count(True) > 0):
    fmodel_cut = utils.fmodel_simple(
      xray_structures     = xray_structures,
      scattering_table    = scattering_table,
      f_obs               = fmodel.f_obs().select(tmp_sel),
      r_free_flags        = fmodel.r_free_flags().select(tmp_sel),
      skip_twin_detection = params.skip_twin_detection)
  mvd_obj.collect(misc = group_args(
    r_work_cutoff = fmodel_cut.r_work(),
    r_free_cutoff = fmodel_cut.r_free(),
    n_refl_cutoff = fmodel_cut.f_obs().data().size()))
  mvd_obj.collect(data =
    show_data(fmodel          = fmodel,
              n_outl          = n_outl,
              test_flag_value = test_flag_value,
              f_obs_labels    = f_obs_labels,
              fmodel_cut      = fmodel_cut))
  # map statistics
  if(len(xray_structures)==1): # XXX no multi-model support yet
    mvd_obj.collect(maps = maps(fmodel = fmodel, mvd_obj = mvd_obj))
  # CC* and friends
  cc_star_stats = None
  if (params.unmerged_data is not None) :
    import mmtbx.validation.experimental
    import mmtbx.command_line
    f_obs = fmodel.f_obs().average_bijvoet_mates()
    unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data(
      f_obs=f_obs,
      file_name=params.unmerged_data,
      data_labels=params.unmerged_labels,
      log=null_out())
    cc_star_stats = mmtbx.validation.experimental.merging_and_model_statistics(
      f_model=fmodel.f_model().average_bijvoet_mates(),
      f_obs=f_obs,
      r_free_flags=fmodel.r_free_flags().average_bijvoet_mates(),
      unmerged_i_obs=unmerged_i_obs,
      n_bins=params.n_bins)
  mvd_obj.show(log=out)
  if (cc_star_stats is not None) :
    cc_star_stats.show_model_vs_data(out=out, prefix="  ")
  if return_fmodel_and_pdb :
    mvd_obj.pdb_file = processed_pdb_file
    mvd_obj.fmodel = fmodel
  if(len(params.map) > 0):
    for map_name_string in params.map:
      map_type_obj = mmtbx.map_names(map_name_string = map_name_string)
      map_params = mmtbx.maps.map_and_map_coeff_master_params().fetch(
        mmtbx.maps.cast_map_coeff_params(map_type_obj)).extract()
      maps_obj = mmtbx.maps.compute_map_coefficients(fmodel = fmodel_cut, params =
        map_params.map_coefficients)
      fn = os.path.basename(processed_args.reflection_file_names[0])
      if(fn.count(".")):
        prefix = fn[:fn.index(".")]
      else: prefix= fn
      file_name = prefix+"_%s_map_coeffs.mtz"%map_type_obj.format()
      maps_obj.write_mtz_file(file_name = file_name)
  # statistics in bins
  if(not fmodel.twin):
    print >> log, "Statistics in resolution bins:"
    mmtbx.f_model_info.r_work_and_completeness_in_resolution_bins(
      fmodel = fmodel, out = log, prefix="  ")
  # report map cc
  if(params.comprehensive and not fmodel_cut.twin and
     fmodel_cut.xray_structure is not None):
    rsc_params = real_space_correlation.master_params().extract()
    rsc_params.scattering_table = scattering_table
    real_space_correlation.simple(
       fmodel        = fmodel_cut,
       pdb_hierarchy = hierarchy,
       params        = rsc_params,
       log           = log,
       show_results  = True)
  #
  if(params.dump_result_object_as_pickle):
    output_prefixes = []
    for op in processed_args.pdb_file_names+processed_args.reflection_file_names:
      op = os.path.basename(op)
      try: op = op[:op.index(".")]
      except Exception: pass
      if(not op in output_prefixes): output_prefixes.append(op)
    output_prefix = "_".join(output_prefixes)
    easy_pickle.dump("%s.pickle"%output_prefix, mvd_obj)
  return mvd_obj
Example #7
0
    def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None):

        from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
        from mmtbx import real_space_correlation

        validation.__init__(self)

        pdb_hierarchy = model.get_hierarchy()
        crystal_symmetry = model.crystal_symmetry()

        # arrays for different components
        self.everything = list()
        self.protein = list()
        self.other = list()
        self.water = list()
        aa_codes = one_letter_given_three_letter

        # redo real_space_corelation.simple to use map objects instead of filenames
        self.overall_rsc = None
        rsc = None
        try:
            rsc_params = real_space_correlation.master_params().extract()
            rsc_params.detail = "residue"
            rsc_params.map_1.fill_missing_reflections = False
            rsc_params.map_2.fill_missing_reflections = False
            use_maps = False
            if (molprobity_map_params is not None):
                rsc_params.map_coefficients_file_name = \
                  molprobity_map_params.map_coefficients_file_name
                rsc_params.map_coefficients_label = \
                  molprobity_map_params.map_coefficients_label
                if (molprobity_map_params.map_file_name is not None):
                    use_maps = True
            # use mmtbx/command_line/map_model_cc.py for maps
            self.fsc = None
            if (use_maps):
                from iotbx import map_and_model
                from mmtbx.maps import map_model_cc
                from mmtbx.command_line.map_model_cc import get_fsc
                from iotbx.file_reader import any_file
                params = map_model_cc.master_params().extract()
                params.map_model_cc.resolution = molprobity_map_params.d_min
                map_object = any_file(
                    molprobity_map_params.map_file_name).file_object

                # check that model crystal symmetry matches map crystal symmetry
                mmi = map_and_model.input(map_data=map_object.map_data(),
                                          model=model)

                rsc_object = map_model_cc.map_model_cc(
                    mmi.map_data(),
                    mmi.model().get_hierarchy(), mmi.crystal_symmetry(),
                    params.map_model_cc)
                rsc_object.validate()
                rsc_object.run()
                rsc = rsc_object.get_results()
                self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks)

                self.fsc = get_fsc(mmi.map_data(), mmi.model(),
                                   params.map_model_cc)
                self.fsc.atom_radius = rsc.atom_radius
                rsc = rsc.cc_per_residue
            # mmtbx/real_space_correlation.py for X-ray/neutron data and map
            # coefficients
            else:
                self.overall_rsc, rsc = real_space_correlation.simple(
                    fmodel=fmodel,
                    pdb_hierarchy=pdb_hierarchy,
                    params=rsc_params,
                    log=null_out())
        except Exception as e:
            raise
        else:
            assert ((self.overall_rsc is not None) and (rsc is not None))
            for i, result_ in enumerate(rsc):
                if (use_maps
                    ):  # new rsc calculation (mmtbx/maps/model_map_cc.py)
                    result = residue_real_space(chain_id=result_.chain_id,
                                                resname=result_.resname,
                                                resseq=result_.resseq,
                                                icode=result_.icode,
                                                altloc="",
                                                score=result_.cc,
                                                b_iso=result_.b_iso_mean,
                                                occupancy=result_.occ_mean,
                                                outlier=result_.cc < cc_min,
                                                xyz=result_.xyz_mean)
                else:  # old rsc calculation (mmtbx/maps/real_space_correlation.py)
                    result = residue_real_space(
                        chain_id=result_.chain_id,
                        resname=result_.residue.resname,
                        resseq=result_.residue.resseq,
                        icode=result_.residue.icode,
                        altloc="",
                        score=result_.cc,
                        b_iso=result_.b,
                        occupancy=result_.occupancy,
                        fmodel=result_.map_value_1,
                        two_fofc=result_.map_value_2,
                        outlier=result_.cc < cc_min,
                        xyz=result_.residue.atoms().extract_xyz().mean())
                if result.is_outlier():
                    self.n_outliers += 1
                # XXX unlike other validation metrics, we always save the results for
                # the real-space correlation, since these are used as the basis for
                # the multi-criterion plot in Phenix.  The show() method will only
                # print outliers, however.
                if (result_.residue.resname !=
                        'HOH'):  # water is handled by waters.py
                    self.everything.append(result)
                    if result_.residue.resname in one_letter_given_three_letter:
                        self.protein.append(result)
                    else:
                        self.other.append(result)
                self.everything += self.water
                self.results = self.protein
Example #8
0
def run(args,
        command_name="mmtbx.model_vs_data",
        show_geometry_statistics=True,
        model_size_max_atoms=80000,
        data_size_max_reflections=1000000,
        unit_cell_max_dimension=800.,
        return_fmodel_and_pdb=False,
        out=None,
        log=sys.stdout):
    import mmtbx.f_model.f_model_info
    if (len(args) == 0) or (args == ["--help"]):
        print >> log, msg
        defaults(log=log, silent=False)
        return
    parsed = defaults(log=log, silent=True)
    #
    mvd_obj = mvd()
    #
    processed_args = utils.process_command_line_args(args=args,
                                                     log=log,
                                                     master_params=parsed)
    params = processed_args.params.extract()
    #
    reflection_files = processed_args.reflection_files
    if (len(reflection_files) == 0):
        raise Sorry("No reflection file found.")
    crystal_symmetry = processed_args.crystal_symmetry
    if (crystal_symmetry is None):
        raise Sorry("No crystal symmetry found.")
    if (len(processed_args.pdb_file_names) == 0):
        raise Sorry("No PDB file found.")
    pdb_file_names = processed_args.pdb_file_names
    #
    rfs = reflection_file_server(crystal_symmetry=crystal_symmetry,
                                 reflection_files=reflection_files)
    parameters = utils.data_and_flags_master_params().extract()
    if (params.f_obs_label is not None):
        parameters.labels = params.f_obs_label
    if (params.r_free_flags_label is not None):
        parameters.r_free_flags.label = params.r_free_flags_label
    if (params.high_resolution is not None):
        parameters.high_resolution = params.high_resolution
    determine_data_and_flags_result = utils.determine_data_and_flags(
        reflection_file_server=rfs,
        parameters=parameters,
        data_parameter_scope="refinement.input.xray_data",
        flags_parameter_scope="refinement.input.xray_data.r_free_flags",
        data_description="X-ray data",
        keep_going=True,
        log=StringIO())
    f_obs = determine_data_and_flags_result.f_obs
    number_of_reflections = f_obs.indices().size()
    if (params.ignore_giant_models_and_datasets
            and number_of_reflections > data_size_max_reflections):
        raise Sorry("Too many reflections: %d" % number_of_reflections)
    #
    max_unit_cell_dimension = max(f_obs.unit_cell().parameters()[:3])
    if (params.ignore_giant_models_and_datasets
            and max_unit_cell_dimension > unit_cell_max_dimension):
        raise Sorry("Too large unit cell (max dimension): %s" %
                    str(max_unit_cell_dimension))
    #
    r_free_flags = determine_data_and_flags_result.r_free_flags
    test_flag_value = determine_data_and_flags_result.test_flag_value
    if (r_free_flags is None):
        r_free_flags = f_obs.array(data=flex.bool(f_obs.data().size(), False))
        test_flag_value = None
    #
    mmtbx_pdb_file = mmtbx.utils.pdb_file(
        pdb_file_names=pdb_file_names,
        cif_objects=processed_args.cif_objects,
        crystal_symmetry=crystal_symmetry,
        use_neutron_distances=(params.scattering_table == "neutron"),
        ignore_unknown_nonbonded_energy_types=not show_geometry_statistics,
        log=log)
    mmtbx_pdb_file.set_ppf(stop_if_duplicate_labels=False)
    processed_pdb_file = mmtbx_pdb_file.processed_pdb_file
    pdb_raw_records = mmtbx_pdb_file.pdb_raw_records
    pdb_inp = mmtbx_pdb_file.pdb_inp
    #
    # just to avoid going any further with bad PDB file....
    pdb_inp.xray_structures_simple()
    #
    acp = processed_pdb_file.all_chain_proxies
    atom_selections = group_args(
        all=acp.selection(string="all"),
        macromolecule=acp.selection(string="protein or dna or rna"),
        solvent=acp.selection(string="water"),  # XXX single_atom_residue
        ligand=acp.selection(string="not (protein or dna or rna or water)"),
        backbone=acp.selection(string="backbone"),
        sidechain=acp.selection(string="sidechain"))
    #
    scattering_table = params.scattering_table
    exptl_method = pdb_inp.get_experiment_type()
    if (exptl_method is not None) and ("NEUTRON" in exptl_method):
        scattering_table = "neutron"
    xsfppf = mmtbx.utils.xray_structures_from_processed_pdb_file(
        processed_pdb_file=processed_pdb_file,
        scattering_table=scattering_table,
        d_min=f_obs.d_min())
    xray_structures = xsfppf.xray_structures
    if (0):  #XXX normalize occupancies if all models have occ=1 so the total=1
        n_models = len(xray_structures)
        for xrs in xray_structures:
            occ = xrs.scatterers().extract_occupancies()
            occ = occ / n_models
            xrs.set_occupancies(occ)
    model_selections = xsfppf.model_selections
    mvd_obj.collect(crystal=group_args(
        uc=f_obs.unit_cell(),
        sg=f_obs.crystal_symmetry().space_group_info().symbol_and_number(),
        n_sym_op=f_obs.crystal_symmetry().space_group_info().type().group(
        ).order_z(),
        uc_vol=f_obs.unit_cell().volume()))
    #
    hierarchy = pdb_inp.construct_hierarchy()
    pdb_atoms = hierarchy.atoms()
    pdb_atoms.reset_i_seq()
    #
    # Extract TLS
    pdb_tls = None
    pdb_inp_tls = pdb_inp.extract_tls_params(hierarchy)
    pdb_tls = group_args(pdb_inp_tls=pdb_inp_tls,
                         tls_selections=[],
                         tls_selection_strings=[])
    # XXX no TLS + multiple models
    if (pdb_inp_tls.tls_present and pdb_inp_tls.error_string is None
            and len(xray_structures) == 1):
        pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb(
            pdb_inp_tls=pdb_inp_tls,
            all_chain_proxies=mmtbx_pdb_file.processed_pdb_file.
            all_chain_proxies,
            xray_structure=xsfppf.xray_structure_all)
        if (len(pdb_tls.tls_selections) == len(pdb_inp_tls.tls_params)
                and len(pdb_inp_tls.tls_params) > 0):
            xray_structures = [
                utils.extract_tls_and_u_total_from_pdb(
                    f_obs=f_obs,
                    r_free_flags=r_free_flags,
                    xray_structure=xray_structures[
                        0],  # XXX no TLS + multiple models
                    tls_selections=pdb_tls.tls_selections,
                    tls_groups=pdb_inp_tls.tls_params)
            ]
    ###########################
    geometry_statistics = show_geometry(
        xray_structures=xray_structures,
        processed_pdb_file=processed_pdb_file,
        scattering_table=scattering_table,
        hierarchy=hierarchy,
        model_selections=model_selections,
        show_geometry_statistics=show_geometry_statistics,
        mvd_obj=mvd_obj,
        atom_selections=atom_selections)
    ###########################
    mp = mmtbx.masks.mask_master_params.extract()
    f_obs_labels = f_obs.info().label_string()
    f_obs = f_obs.sort(reverse=True, by_value="packed_indices")
    r_free_flags = r_free_flags.sort(reverse=True, by_value="packed_indices")
    fmodel = utils.fmodel_simple(
        xray_structures=xray_structures,
        scattering_table=scattering_table,
        mask_params=mp,
        f_obs=f_obs,
        r_free_flags=r_free_flags,
        skip_twin_detection=params.skip_twin_detection)
    n_outl = f_obs.data().size() - fmodel.f_obs().data().size()
    mvd_obj.collect(model_vs_data=show_model_vs_data(fmodel))
    # Extract information from PDB file header and output (if any)
    pub_r_work = None
    pub_r_free = None
    pub_high = None
    pub_low = None
    pub_sigma = None
    pub_program_name = None
    pub_solv_cont = None
    pub_matthews = None
    published_results = pdb_inp.get_r_rfree_sigma(file_name=pdb_file_names[0])
    if (published_results is not None):
        pub_r_work = published_results.r_work
        pub_r_free = published_results.r_free
        pub_high = published_results.high
        pub_low = published_results.low
        pub_sigma = published_results.sigma
    pub_program_name = pdb_inp.get_program_name()
    pub_solv_cont = pdb_inp.get_solvent_content()
    pub_matthews = pdb_inp.get_matthews_coeff()
    mvd_obj.collect(pdb_header=group_args(program_name=pub_program_name,
                                          year=pdb_inp.extract_header_year(),
                                          r_work=pub_r_work,
                                          r_free=pub_r_free,
                                          high_resolution=pub_high,
                                          low_resolution=pub_low,
                                          sigma_cutoff=pub_sigma,
                                          matthews_coeff=pub_matthews,
                                          solvent_cont=pub_solv_cont,
                                          tls=pdb_tls,
                                          exptl_method=exptl_method))
    #
    # Recompute R-factors using published cutoffs
    fmodel_cut = fmodel
    tmp_sel = flex.bool(fmodel.f_obs().data().size(), True)
    if (pub_sigma is not None and fmodel.f_obs().sigmas() is not None):
        tmp_sel &= fmodel.f_obs().data() > fmodel.f_obs().sigmas() * pub_sigma
    if (pub_high is not None
            and abs(pub_high - fmodel.f_obs().d_min()) > 0.03):
        tmp_sel &= fmodel.f_obs().d_spacings().data() > pub_high
    if (pub_low is not None
            and abs(pub_low - fmodel.f_obs().d_max_min()[0]) > 0.03):
        tmp_sel &= fmodel.f_obs().d_spacings().data() < pub_low
    if (tmp_sel.count(True) != tmp_sel.size() and tmp_sel.count(True) > 0):
        fmodel_cut = utils.fmodel_simple(
            xray_structures=xray_structures,
            scattering_table=scattering_table,
            f_obs=fmodel.f_obs().select(tmp_sel),
            r_free_flags=fmodel.r_free_flags().select(tmp_sel),
            skip_twin_detection=params.skip_twin_detection)
    mvd_obj.collect(
        misc=group_args(r_work_cutoff=fmodel_cut.r_work(),
                        r_free_cutoff=fmodel_cut.r_free(),
                        n_refl_cutoff=fmodel_cut.f_obs().data().size()))
    mvd_obj.collect(data=show_data(fmodel=fmodel,
                                   n_outl=n_outl,
                                   test_flag_value=test_flag_value,
                                   f_obs_labels=f_obs_labels,
                                   fmodel_cut=fmodel_cut))
    # CC* and friends
    cc_star_stats = None
    if (params.unmerged_data is not None):
        import mmtbx.validation.experimental
        import mmtbx.command_line
        f_obs = fmodel.f_obs().average_bijvoet_mates()
        unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data(
            f_obs=f_obs,
            file_name=params.unmerged_data,
            data_labels=params.unmerged_labels,
            log=null_out())
        cc_star_stats = mmtbx.validation.experimental.merging_and_model_statistics(
            f_model=fmodel.f_model().average_bijvoet_mates(),
            f_obs=f_obs,
            r_free_flags=fmodel.r_free_flags().average_bijvoet_mates(),
            unmerged_i_obs=unmerged_i_obs,
            n_bins=params.n_bins)
    mvd_obj.show(log=out)
    if (cc_star_stats is not None):
        cc_star_stats.show_model_vs_data(out=out, prefix="  ")
    if return_fmodel_and_pdb:
        mvd_obj.pdb_file = processed_pdb_file
        mvd_obj.fmodel = fmodel
    if (len(params.map) > 0):
        for map_name_string in params.map:
            map_type_obj = mmtbx.map_names(map_name_string=map_name_string)
            map_params = mmtbx.maps.map_and_map_coeff_master_params().fetch(
                mmtbx.maps.cast_map_coeff_params(map_type_obj)).extract()
            maps_obj = mmtbx.maps.compute_map_coefficients(
                fmodel=fmodel_cut, params=map_params.map_coefficients)
            fn = os.path.basename(processed_args.reflection_file_names[0])
            if (fn.count(".")):
                prefix = fn[:fn.index(".")]
            else:
                prefix = fn
            file_name = prefix + "_%s_map_coeffs.mtz" % map_type_obj.format()
            maps_obj.write_mtz_file(file_name=file_name)
    # statistics in bins
    if (not fmodel.twin):
        print >> log, "Statistics in resolution bins:"
        mmtbx.f_model.f_model_info.r_work_and_completeness_in_resolution_bins(
            fmodel=fmodel, out=log, prefix="  ")
    # report map cc
    if (params.comprehensive and not fmodel_cut.twin
            and fmodel_cut.xray_structure is not None):
        rsc_params = real_space_correlation.master_params().extract()
        rsc_params.scattering_table = scattering_table
        real_space_correlation.simple(fmodel=fmodel_cut,
                                      pdb_hierarchy=hierarchy,
                                      params=rsc_params,
                                      log=log,
                                      show_results=True)
    #
    if (params.dump_result_object_as_pickle):
        output_prefixes = []
        for op in processed_args.pdb_file_names + processed_args.reflection_file_names:
            op = os.path.basename(op)
            try:
                op = op[:op.index(".")]
            except Exception:
                pass
            if (not op in output_prefixes): output_prefixes.append(op)
        output_prefix = "_".join(output_prefixes)
        easy_pickle.dump("%s.pickle" % output_prefix, mvd_obj)
    return mvd_obj
Example #9
0
    def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None):

        from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
        from mmtbx import real_space_correlation

        validation.__init__(self)

        pdb_hierarchy = model.get_hierarchy()
        crystal_symmetry = model.crystal_symmetry()

        # arrays for different components
        self.everything = list()
        self.protein = list()
        self.other = list()
        self.water = list()
        aa_codes = one_letter_given_three_letter.keys()

        # redo real_space_corelation.simple to use map objects instead of filenames
        self.overall_rsc = None
        rsc = None
        try:
            rsc_params = real_space_correlation.master_params().extract()
            rsc_params.detail = "residue"
            rsc_params.map_1.fill_missing_reflections = False
            rsc_params.map_2.fill_missing_reflections = False
            use_maps = False
            if (molprobity_map_params is not None):
                rsc_params.map_coefficients_file_name = \
                  molprobity_map_params.map_coefficients_file_name
                rsc_params.map_coefficients_label = \
                  molprobity_map_params.map_coefficients_label
                if (molprobity_map_params.map_file_name is not None):
                    use_maps = True
            # use mmtbx/command_line/map_model_cc.py for maps
            self.fsc = None
            if (use_maps):
                from iotbx import map_and_model
                from mmtbx.maps import map_model_cc
                from mmtbx.command_line.map_model_cc import get_fsc
                from iotbx.file_reader import any_file
                params = map_model_cc.master_params().extract()
                params.map_model_cc.resolution = molprobity_map_params.d_min
                map_object = any_file(
                    molprobity_map_params.map_file_name).file_object

                # check that model crystal symmetry matches map crystal symmetry
                mmi = map_and_model.input(map_data=map_object.map_data(),
                                          model=model)

                rsc_object = map_model_cc.map_model_cc(
                    mmi.map_data(),
                    mmi.model().get_hierarchy(), mmi.crystal_symmetry(),
                    params.map_model_cc)
                rsc_object.validate()
                rsc_object.run()
                rsc = rsc_object.get_results()
                self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks)

                self.fsc = get_fsc(mmi.map_data(), mmi.model(),
                                   params.map_model_cc)
                self.fsc.atom_radius = rsc.atom_radius
                rsc = rsc.cc_per_residue
            # mmtbx/real_space_correlation.py for X-ray/neutron data and map
            # coefficients
            else:
                self.overall_rsc, rsc = real_space_correlation.simple(
                    fmodel=fmodel,
                    pdb_hierarchy=pdb_hierarchy,
                    params=rsc_params,
                    log=null_out())
        except Exception, e:
            raise
Example #10
0
  def __init__ (self, fmodel, pdb_hierarchy, crystal_symmetry=None, cc_min=0.8,
                molprobity_map_params=None) :

    from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
    from mmtbx import real_space_correlation

    validation.__init__(self)

    # arrays for different components
    self.everything = list()
    self.protein = list()
    self.other = list()
    self.water = list()
    aa_codes = one_letter_given_three_letter.keys()

    # redo real_space_corelation.simple to use map objects instead of filenames
    self.overall_rsc = None
    rsc = None
    try :
      rsc_params = real_space_correlation.master_params().extract()
      rsc_params.detail="residue"
      rsc_params.map_1.fill_missing_reflections = False
      rsc_params.map_2.fill_missing_reflections = False
      use_maps = False
      if (molprobity_map_params is not None):
        rsc_params.map_coefficients_file_name = \
          molprobity_map_params.map_coefficients_file_name
        rsc_params.map_coefficients_label = \
          molprobity_map_params.map_coefficients_label
        if (molprobity_map_params.map_file_name is not None):
          use_maps = True
      # use mmtbx/command_line/map_model_cc.py for maps
      self.fsc = None
      if (use_maps):
        from scitbx.array_family import flex
        import iotbx.pdb
        from mmtbx.maps import map_model_cc
        from mmtbx.command_line.map_model_cc import get_fsc
        from iotbx.file_reader import any_file
        from cctbx import crystal, sgtbx
        params = map_model_cc.master_params().extract()
        params.map_model_cc.resolution = molprobity_map_params.d_min
        map_object = any_file(molprobity_map_params.map_file_name).file_object

        # ---------------------------------------------------------------------
        # check that model crystal symmetry matches map crystal symmetry
        # if inconsistent, map parameters take precedence
        # TODO: centralize data consistency checks prior to running validation
        map_crystal_symmetry = crystal.symmetry(
          unit_cell=map_object.unit_cell(),
          space_group=sgtbx.space_group_info(
            map_object.space_group_number).group())
        if (not map_crystal_symmetry.is_similar_symmetry(crystal_symmetry)):
          crystal_symmetry = map_crystal_symmetry

        # ---------------------------------------------------------------------

        map_data = map_object.map_data()
        rsc_object = map_model_cc.map_model_cc(
          map_data, pdb_hierarchy, crystal_symmetry, params.map_model_cc)
        rsc_object.validate()
        rsc_object.run()
        rsc = rsc_object.get_results()
        self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks)

        # pdb_hierarchy.as_pdb_input is being phased out since that function
        # just re-processes the file from text and can be lossy
        # this is a placeholder until tools get updated to use the model class
        pdb_input = iotbx.pdb.input(
          source_info='pdb_hierarchy',
          lines=flex.split_lines(pdb_hierarchy.as_pdb_string()))
        model = mmtbx.model.manager(model_input = pdb_input)
        self.fsc = get_fsc(map_data, model, params.map_model_cc)
        #

        self.fsc.atom_radius = rsc.atom_radius
        rsc = rsc.cc_per_residue
      # mmtbx/real_space_correlation.py for X-ray/neutron data and map
      # coefficients
      else:
        self.overall_rsc, rsc = real_space_correlation.simple(
          fmodel=fmodel,
          pdb_hierarchy=pdb_hierarchy,
          params=rsc_params,
          log=null_out())
    except Exception, e :
      raise