def run(args, out=sys.stdout):
    usage_string = """\
mmtbx.validate_ligands model.pdb data.mtz LIGAND_CODE [...]

Print out basic statistics for residue(s) with the given code(s), including
electron density values/CC.
"""
    import mmtbx.validation.ligands
    import mmtbx.command_line
    args_ = []
    for arg in args:
        if (len(arg) == 3) and arg.isalnum() and (not os.path.exists(arg)):
            args_.append("ligand_code=%s" % arg)
        else:
            args_.append(arg)
    cmdline = mmtbx.command_line.load_model_and_data(args=args_,
                                                     master_phil=master_phil(),
                                                     process_pdb_file=False,
                                                     usage_string=usage_string)
    params = cmdline.params
    if (params.ligand_code is None) or (len(params.ligand_code) == 0):
        raise Sorry("Ligand code required!")
    make_sub_header("Validating ligands", out=out)
    for ligand_code in params.ligand_code:
        validations = mmtbx.validation.ligands.validate_ligands(
            pdb_hierarchy=cmdline.pdb_hierarchy,
            fmodel=cmdline.fmodel,
            ligand_code=ligand_code,
            reference_structure=params.reference_structure,
            only_segid=params.only_segid)
        if (validations is None):
            raise Sorry("No ligands named '%s' found." % ligand_code)
        mmtbx.validation.ligands.show_validation_results(
            validations=validations, out=out, verbose=params.verbose)
Exemple #2
0
 def run(self):
     self.model = self.data_manager.get_model()
     #
     make_sub_header('Add H atoms', out=self.logger)
     reduce_add_h_obj = reduce_hydrogen.place_hydrogens(
         model=self.model,
         use_neutron_distances=self.params.use_neutron_distances,
         n_terminal_charge=self.params.n_terminal_charge)
     #import line_profiler
     #lp = line_profiler.LineProfiler(reduce_add_h_obj.run)
     #lp.enable()
     reduce_add_h_obj.run()
     #lp.disable()
     #lp.print_stats()
     self.model = reduce_add_h_obj.get_model()
     reduce_add_h_obj.show(log=self.logger)
     #
     make_sub_header('Optimize H atoms', out=self.logger)
     self.model = reduce_hydrogen.optimize(model=self.model)
     #
     if (self.params.output.file_name_prefix is not None):
         base = self.params.output.file_name_prefix
     else:
         fp = self.data_manager.get_default_model_name()
         base = os.path.splitext(os.path.basename(fp))[0]
     of = open("%s_hydrogenate.pdb" % base, "w")
     of.write(self.model.model_as_pdb())
     of.close()
 def show(self, log=null_out(), show_clashscore=True):
   """
   Print all clashes in a table.
   """
   make_sub_header(' Nonbonded overlaps', out=log)
   if self._clashes_dict:
     # General information
     results = self.get_results()
     result_str = '{:<18} : {:5d}'
     print(result_str.format(' Number of clashes', results.n_clashes), file=log)
     print(result_str.format(' Number of clashes due to symmetry', results.n_clashes_sym), file=log)
     result_str = '{:<18} : {:5.2f}'
     if show_clashscore:
       print(result_str.format(' Clashscore', results.clashscore), file=log)
     # print table with all overlaps
     labels =  ["Overlapping residues info","model distance","overlap",
                "symmetry"]
     lbl_str = '{:^33}|{:^16}|{:^11}|{:^15}'
     table_str = '{:>16}|{:>16}|{:^16.2f}|{:^11.2}|{:^15}|'
     print('\n' + lbl_str.format(*labels), file=log)
     print('-'*78, file=log)
     atoms = self.model.get_atoms()
     for iseq_tuple, record in self._clashes_dict.iteritems():
       i_seq, j_seq = iseq_tuple
       overlap = record[2]
       if record[4] is not None:
         symop = record[4]
       else: symop = ''
       i_id_str = atoms[i_seq].id_str().replace('pdb=','').replace('"','')
       j_id_str = atoms[j_seq].id_str().replace('pdb=','').replace('"','')
       line = [i_id_str, j_id_str,round(record[0], 2),round(overlap, 2), symop]
       print(table_str.format(*line), file=log)
     print('-'*78, file=log)
   else:
     print('No clashes found', file=log)
Exemple #4
0
    def show_cc_star(self, out=None):
        make_sub_header("CC* and related statistics", out=out)
        print >> out, """\
 d_max   d_min  n_uniq  compl. <I/sI>  cc_1/2    cc* cc_work cc_free r_work r_free"""
        for k, bin in enumerate(self.bins):
            print >> out, bin.format_for_model_cc()
        print >> out, self.overall.format_for_model_cc()
 def show_estimated_cutoffs (self, out=sys.stdout, prefix="") :
   print >> out, ""
   print >> out, ""
   def format_d_min (value) :
     if (value is None) :
       return "(use all data)" #% self.d_min_overall
     return "%7.3f" % value
   make_sub_header("Resolution cutoff estimates", out=out)
   print >> out, prefix + "  resolution of all data          : %7.3f" % \
     self.overall.d_min
   cc_one_half_cut = self.estimate_d_min(min_cc_one_half=0.33)
   i_over_sigma_cut = self.estimate_d_min(min_i_over_sigma=2.0)
   r_merge_cut = self.estimate_d_min(max_r_merge=0.5)
   r_meas_cut = self.estimate_d_min(max_r_meas=0.5)
   cc_anom_cut = self.estimate_d_min(min_cc_anom=0.3)
   completeness_cut_conservative = self.estimate_d_min(min_completeness=0.9)
   completeness_cut_permissive = self.estimate_d_min(min_completeness=0.5)
   print >> out, prefix + "  based on CC(1/2) >= 0.33        : %s" % \
     format_d_min(cc_one_half_cut)
   print >> out, prefix + "  based on mean(I/sigma) >= 2.0   : %s" % \
     format_d_min(i_over_sigma_cut)
   print >> out, prefix + "  based on R-merge < 0.5          : %s" % \
     format_d_min(r_merge_cut)
   print >> out, prefix + "  based on R-meas < 0.5           : %s" % \
     format_d_min(r_meas_cut)
   print >> out, prefix + "  based on completeness >= 90%%    : %s" % \
     format_d_min(completeness_cut_conservative)
   print >> out, prefix + "  based on completeness >= 50%%    : %s" % \
     format_d_min(completeness_cut_permissive)
   print >> out, ""
   print >> out, "NOTE: we recommend using all data out to the CC(1/2) limit"
   print >> out, "for refinement."
 def show (self, out=sys.stdout, prefix="", outliers_only=None,
     verbose=True) :
   for geo_type in self.__slots__ :
     rv = getattr(self, geo_type)
     if (rv.n_outliers > 0) or (not outliers_only) :
       make_sub_header(rv.label, out=out)
       rv.show(out=out)
Exemple #7
0
 def print_adps(self):
     make_sub_header(' ADPs ', out=self.log)
     pad1 = ' ' * 20
     print(pad1, "min   max    mean   n_iso   n_aniso", file=self.log)
     for id_tuple, ligand_dict in self.items():
         if len(ligand_dict) == 1:
             pad2 = ' ' * 4
             lr = ligand_dict.values()[0]
             adps = lr.get_adps()
             print(lr.resname,
                   lr.id_str,
                   pad2,
                   '%7s%7s%7s%7s%7s' %
                   (round(adps.b_min, 1), round(adps.b_max, 1),
                    round(adps.b_mean, 1), adps.n_iso, adps.n_aniso),
                   file=self.log)
         else:
             pad2 = ' ' * 2
             for altloc, lr in ligand_dict.items():
                 adps = lr.get_adps()
                 print(lr.resname,
                       lr.id_str,
                       altloc,
                       pad2,
                       '%7s%7s%7s%7s%7s' %
                       (round(adps.b_min, 1), round(adps.b_max, 1),
                        round(adps.b_mean, 1), adps.n_iso, adps.n_aniso),
                       file=self.log)
  def show_cc_star (self, out=None) :
    make_sub_header("CC* and related statistics", out=out)
    print >> out, """\
 d_max   d_min  n_uniq  compl. <I/sI>  cc_1/2    cc* cc_work cc_free r_work r_free"""
    for k, bin in enumerate(self.bins) :
      print >> out, bin.format_for_model_cc()
    print >> out, self.overall.format_for_model_cc()
Exemple #9
0
 def warn (self, text) :
   self._warnings.append(text)
   out_tmp = StringIO()
   make_sub_header("WARNING", out=out_tmp, sep='*')
   for line in out_tmp.getvalue().splitlines() :
     self.out.write("%s\n" % line.rstrip())
   self.out.write(text)
Exemple #10
0
  def run(self):

    print('Using model file:', self.data_manager.get_default_model_name())
    print('Using reflection file:', self.data_manager.get_default_miller_array_name())

    cs = self.get_crystal_symmetry()
    model = self.data_manager.get_model()
    ph = model.get_hierarchy()
    xrs = model.get_xray_structure()

    if self.data_manager.get_default_miller_array_name():
      f_obs, r_free_flags = self.get_fobs_rfree(crystal_symmetry = cs)
      print('\nInput data...', file=self.logger)
      print('  Reflection data:', f_obs.info().labels, file=self.logger)
      if (r_free_flags is not None):
        print('  Free-R flags:', r_free_flags.info().labels, file=self.logger)
      else:
        print('  Free-R flags: not present or not found', file=self.logger)
      fmodel = mmtbx.f_model.manager(
       f_obs          = f_obs,
       r_free_flags   = r_free_flags,
       xray_structure = xrs)
      # TODO: delete this keyword for production
      if self.params.update_scales:
        fmodel.update_all_scales()

    print('\nWorking crystal symmetry after inspecting all inputs:', file=self.logger)
    cs.show_summary(f=self.logger)

    # This is the new class, currently a stub but will be developed
    # winter 2018/spring 2019 by DL and NWM
    #t0 = time.time()
    ligand_manager = validate_ligands.manager(
      model = model,
      nproc = self.params.nproc,
      log   = self.logger)
    ligand_manager.run()
    ligand_manager.print_ligand_counts()
    ligand_manager.print_ligand_occupancies()
    ligand_manager.print_adps()
    #print('time running manager: ', time.time()-t0)

    # TODO
    # DL: Eventually, delete "old" call below, but leave it for now to keep the
    # funcitonality alive, just in case
    if self.params.ligand_code and self.data_manager.get_default_miller_array_name() is not None:
      if (not(self.params.ligand_code is None or self.params.ligand_code[0] is None)):
        make_sub_header("Validating ligands", out=self.logger)
        for ligand_code in self.params.ligand_code :
          validations = mmtbx.validation.ligands.validate_ligands(
            pdb_hierarchy       = ph,
            fmodel              = fmodel,
            ligand_code         = ligand_code,
            reference_structure = self.params.reference_structure,
            only_segid          = self.params.only_segid)
          if (validations is None):
            raise Sorry("No ligands named '%s' found." % ligand_code)
          mmtbx.validation.ligands.show_validation_results(validations=validations,
            out     = self.logger,
            verbose = self.params.verbose)
Exemple #11
0
 def refine (self, title="Refining multi-conformer model",
     constrain_occupancies=Auto) :
   make_sub_header(title, out=self.out)
   t1 = time.time()
   extra_args = []
   if constrain_occupancies :
     if (self.params.refinement.constrain_correlated_occupancies) :
       extra_args.append("constrain_correlated_3d_groups=True")
   else :
     print >> self.out, "  Correlated occupancies will *not* be constrained"
   from phenix.automation import refinement
   refined = refinement.refine_hires_simple(
     pdb_hierarchy=self.pdb_hierarchy,
     crystal_symmetry=self.fmodel.xray_structure,
     fmodel=self.fmodel,
     params=self.params.refinement,
     cif_files=self.cif_files,
     cycle=self.refine_cycle,
     extra_args=extra_args,
     out=self.out) # TODO need a verbosity flag
   t2 = time.time()
   print >> self.out, "  refinement time: %.3fs" % (t2-t1)
   print >> self.out, ""
   self.pdb_hierarchy = refined.pdb_hierarchy
   self.fmodel = refined.fmodel
   self.fmodel.info().show_targets(out=self.out, text="refined model")
   self.map_file = refined.map_file
   self.refine_cycle += 1
Exemple #12
0
  def print_overall_results(self, overall_counts_hd, prefix='', log=None):
    if (log is None):
      log = self.log

    oc = overall_counts_hd

    make_sub_header('H/D atoms in the input model', out=log)
    self.hd_overall_values = [
      ('Total number of hydrogen atoms' , oc.count_h),
      ('Total number of deuterium atoms' , oc.count_d),
      ('Number of H atoms (protein)' , oc.count_h_protein),
      ('Number of D atoms (protein)' , oc.count_d_protein),
      ('Number of H atoms (water)' , oc.count_h_water),
      ('Number of D atoms (water)' , oc.count_d_water),
      ('Number of H atoms (other)' , oc.count_h_other),
      ('Number of D atoms (other)' , oc.count_d_other),
    ]
    self.formatted_print(prefix, self.hd_overall_values, log)

    make_sub_header('Water molecules', out=log)
    self.hd_water_values = [
      ('Number of water', oc.count_water),
      ('Number of water with 0 H (or D)', oc.count_water_0h),
      ('Number of water with 1 H (or D)', oc.count_water_1h),
      ('Number of water with 2 H (or D)', oc.count_water_2h),
      ('Number of water in alternative conformation', oc.count_water_altconf),
      ('Number of water without oxygen atom', oc.count_water_no_oxygen)
    ]
    self.formatted_print(prefix, self.hd_water_values, log)
 def show (self, out=sys.stdout, prefix="", outliers_only=None,
     verbose=True) :
   for geo_type in self.__slots__ :
     rv = getattr(self, geo_type)
     if (rv.n_outliers > 0) or (not outliers_only) :
       make_sub_header(rv.label, out=out)
       rv.show(out=out)
Exemple #14
0
    def show_estimated_cutoffs(self, out=sys.stdout, prefix=""):
        print >> out, ""
        print >> out, ""

        def format_d_min(value):
            if (value is None):
                return "(use all data)"  #% self.d_min_overall
            return "%7.3f" % value

        make_sub_header("Resolution cutoff estimates", out=out)
        print >> out, prefix + "  resolution of all data          : %7.3f" % \
          self.overall.d_min
        cc_one_half_cut = self.estimate_d_min(min_cc_one_half=0.33)
        i_over_sigma_cut = self.estimate_d_min(min_i_over_sigma=2.0)
        r_merge_cut = self.estimate_d_min(max_r_merge=0.5)
        r_meas_cut = self.estimate_d_min(max_r_meas=0.5)
        cc_anom_cut = self.estimate_d_min(min_cc_anom=0.3)
        completeness_cut_conservative = self.estimate_d_min(
            min_completeness=0.9)
        completeness_cut_permissive = self.estimate_d_min(min_completeness=0.5)
        print >> out, prefix + "  based on CC(1/2) >= 0.33        : %s" % \
          format_d_min(cc_one_half_cut)
        print >> out, prefix + "  based on mean(I/sigma) >= 2.0   : %s" % \
          format_d_min(i_over_sigma_cut)
        print >> out, prefix + "  based on R-merge < 0.5          : %s" % \
          format_d_min(r_merge_cut)
        print >> out, prefix + "  based on R-meas < 0.5           : %s" % \
          format_d_min(r_meas_cut)
        print >> out, prefix + "  based on completeness >= 90%%    : %s" % \
          format_d_min(completeness_cut_conservative)
        print >> out, prefix + "  based on completeness >= 50%%    : %s" % \
          format_d_min(completeness_cut_permissive)
        print >> out, ""
        print >> out, "NOTE: we recommend using all data out to the CC(1/2) limit"
        print >> out, "for refinement."
 def rejoin(self):
     make_sub_header("Re-joining identical conformers", out=self.out)
     pdb_hierarchy = self.pdb_hierarchy.deep_copy()
     n_modified = alternate_conformations.rejoin_split_single_conformers(
         pdb_hierarchy=pdb_hierarchy,
         crystal_symmetry=self.fmodel.xray_structure,
         model_error_ml=self.fmodel.model_error_ml(),
         params=self.params.merging,
         reset_occupancies=self.params.refinement.
         constrain_correlated_occupancies,
         verbose=self.verbose,
         log=self.out)
     if (n_modified > 0):
         self.pdb_hierarchy = pdb_hierarchy
         xray_structure = self.pdb_hierarchy.extract_xray_structure(
             crystal_symmetry=self.fmodel.xray_structure)
         self.fmodel.update_xray_structure(xray_structure)
         self.map_file = None
     alternate_conformations.finalize_model(
         pdb_hierarchy=self.pdb_hierarchy,
         xray_structure=self.pdb_hierarchy.extract_xray_structure(
             crystal_symmetry=self.fmodel.xray_structure),
         set_b_iso=None,
         convert_to_isotropic=False)
     return (n_modified > 0)
Exemple #16
0
 def warn(self, text):
   self._warnings.append(text)
   out_tmp = StringIO()
   make_sub_header("WARNING", out=out_tmp, sep='*')
   for line in out_tmp.getvalue().splitlines():
     self.out.write("%s\n" % line.rstrip())
   self.out.write(text)
Exemple #17
0
  def print_missing_HD_atoms(self, missing_HD_atoms, prefix, log=None):
    if (log is None):
      log = self.log

    make_sub_header('MISSING H or D atoms', out=log)
    for item in missing_HD_atoms:
      print('%s%s conformer %s : %s ' % (prefix, item[0][8:-1], item[2], ", ".join(item[1])),
            file=log)
 def show(self, out=sys.stdout, prefix="", verbose=True):
     for geo_type in self.__geo_types__:
         rv = getattr(self, geo_type)
         make_sub_header(rv.restraint_label + "s", out=out)
         if (geo_type == "angles") and getattr(self, "_use_cdl", False):
             print >> out, "  Using conformation-dependent library for mainchain "+\
                           "bond angle targets"
             print >> out, ""
         rv.show(out=out, prefix=prefix)
Exemple #19
0
 def show (self, out=sys.stdout, prefix="", verbose=True) :
   for geo_type in self.__geo_types__ :
     rv = getattr(self, geo_type)
     make_sub_header(rv.restraint_label + "s", out=out)
     if (geo_type == "angles") and getattr(self, "_use_cdl", False) :
       print >> out, "  Using conformation-dependent library for mainchain "+\
                     "bond angle targets"
       print >> out, ""
     rv.show(out=out, prefix=prefix)
Exemple #20
0
    def print_results_hd_sites(self,
                               count_exchanged_sites,
                               hd_sites_analysis,
                               overall_counts_hd,
                               prefix='',
                               log=None):
        if (log is None):
            log = self.log

        sites_different_xyz = hd_sites_analysis.sites_different_xyz
        sites_different_b = hd_sites_analysis.sites_different_b
        sites_sum_occ_not_1 = hd_sites_analysis.sites_sum_occ_not_1
        sites_occ_sum_no_scattering = hd_sites_analysis.sites_occ_sum_no_scattering

        make_sub_header('H/D EXCHANGED SITES', out=log)
        self.hd_exchange_values = [
            ('Number of H/D exchanged sites', count_exchanged_sites),
            ('Number of atoms modelled only as H',
             overall_counts_hd.count_h_protein - count_exchanged_sites),
            ('Number of atoms modelled only as D',
             overall_counts_hd.count_d_protein - count_exchanged_sites)
        ]
        self.formatted_print(prefix, self.hd_exchange_values, log)

        if sites_different_xyz:
            print('\n%sH/D pairs not at identical positions:' % prefix,
                  file=log)
            for item in sites_different_xyz:
                print('%s  %s and  %s at distance %.3f' % \
                  (prefix, item[0][5:-1], item[1][5:-1], item[2]), file=log)

        if sites_different_b:
            print('\n%sH/D pairs without identical ADPs:' % prefix, file=log)
            for item in sites_different_b:
                print('%s  %s and %s ' %
                      (prefix, item[0][5:-1], item[1][5:-1]),
                      file=log)

        if sites_sum_occ_not_1:
            print('\n%sH/D pairs with occupancy sum != 1:' % prefix, file=log)
            for item in sites_sum_occ_not_1:
                print('%s  %s  and %s with occupancy sum %s' %
                      (prefix, item[0][5:-1], item[1][5:-1], item[2]),
                      file=log)

        if sites_occ_sum_no_scattering:
            print(
                '\n%sRotatable H/D pairs with zero scattering occupancy sum:' %
                prefix,
                file=log)
            for item in sites_occ_sum_no_scattering:
                print('%s  %s with occ %s and  %s with occ %s' %
                      (prefix, item[0][5:-1], item[2], item[1][5:-1], item[3]),
                      file=log)
Exemple #21
0
  def print_renamed(self, renamed, prefix='', log=None):
    if (log is None):
      log = self.log

    make_sub_header('The following atoms were renamed:', out=log)
    for entry in renamed:
      id_str = entry[0]
      oldname = entry[2]
      newname = entry[1]
      print('%s%s atom %s --> %s' % (prefix, id_str, oldname, newname),
            file=log)
Exemple #22
0
 def __init__ (self,
     fmodel,
     pdb_hierarchy,
     params=None,
     processed_pdb_file=None,
     geometry_restraints_manager=None,
     cif_objects=(),
     cif_files=(), # XXX bug
     debug=None,
     verbose=True,
     out=sys.stdout) :
   adopt_init_args(self, locals())
   if (self.params is None) :
     self.params = master_phil.extract().alt_confs
   self.extract_selection()
   self.refine_cycle = 1
   self.map_file = None
   self.r_work_start = fmodel.r_work()
   self.r_free_start = fmodel.r_free()
   t_start = time.time()
   for i_cycle in range(params.macro_cycles) :
     n_alts = self.build_residue_conformers(stop_if_none=(i_cycle==0))
     if (n_alts == 0) :
       if (i_cycle == 0) :
         raise Sorry("No alternate conformations found.")
     else :
       self.refine(constrain_occupancies=False)
       refine_again = self.params.refinement.constrain_correlated_occupancies
       if (self.rejoin()) :
         refine_again = True
       self.refine(title="Refining final model")
   make_header("Finished", out=out)
   from mmtbx.validation import molprobity
   validation = molprobity.molprobity(
     pdb_hierarchy=self.pdb_hierarchy,
     outliers_only=False)
   print >> self.out, ""
   validation.show_summary(out=self.out, prefix="  ")
   make_sub_header("Analyzing final model", out=out)
   analyze_model.process_pdb_hierarchy(
     pdb_hierarchy=self.pdb_hierarchy,
     validation=validation,
     log=self.out).show(out=out, verbose=self.verbose)
   print >> self.out, ""
   print >> self.out, "Start:  r_work=%6.4f  r_free=%6.4f" % \
     (self.r_work_start, self.r_free_start)
   print >> self.out, "Final:  r_work=%6.4f  r_free=%6.4f" % \
     (self.fmodel.r_work(), self.fmodel.r_free())
   t_end = time.time()
   print >> self.out, ""
   print >> self.out, "Total runtime: %d s" % int(t_end - t_start)
   print >> self.out, ""
def run(args=(), params=None, out=None):
    if (out is None):
        out = sys.stdout
    if (params is None):
        import iotbx.phil
        cmdline = iotbx.phil.process_command_line_with_files(
            args=args,
            master_phil_string=master_phil,
            pdb_file_def="adp_statistics.pdb_file",
            cif_file_def="adp_statistics.cif_file",
            usage_string="""\
phenix.b_factor_statistics model.pdb [restraints.cif] [selection=...]

Show statistics for atomic displacement parameters (ADPs) or B-factors,
including TLS contribution if present.""")
        params = cmdline.work.extract()
    validate_params(params)
    import mmtbx.model
    import mmtbx.restraints
    from mmtbx.monomer_library import pdb_interpretation
    processed_pdb_file = pdb_interpretation.run(
        args=[params.adp_statistics.pdb_file] + params.adp_statistics.cif_file,
        substitute_non_crystallographic_unit_cell_if_necessary=True,
        log=out)
    geometry = processed_pdb_file.geometry_restraints_manager(
        show_energies=True)
    restraints_manager = mmtbx.restraints.manager(geometry=geometry,
                                                  normalization=True)
    model = mmtbx.model.manager(
        xray_structure=processed_pdb_file.xray_structure(),
        pdb_hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy,
        restraints_manager=restraints_manager,
        log=out)
    make_sub_header("Analyzing model B-factors", out=out)
    if (params.adp_statistics.selection is not None):
        sel_cache = model.pdb_hierarchy().atom_selection_cache()
        selection = sel_cache.selection(params.adp_statistics.selection)
        n_sel = selection.count(True)
        if (n_sel == 0):
            raise Sorry("No atoms in selection!")
        else:
            model = model.select(selection)
            print >> out, "Extracted %d atoms in selection:" % n_sel
            print >> out, "  %s" % params.adp_statistics.selection
            print >> out, ""
    stats = model.adp_statistics()
    stats.file_name = params.adp_statistics.pdb_file
    stats.selection = params.adp_statistics.selection
    stats.show_1(out=out)
    return stats
def run (args=(), params=None, out=None) :
  if (out is None) :
    out = sys.stdout
  if (params is None) :
    import iotbx.phil
    cmdline = iotbx.phil.process_command_line_with_files(
      args=args,
      master_phil_string=master_phil,
      pdb_file_def="adp_statistics.pdb_file",
      cif_file_def="adp_statistics.cif_file",
      usage_string="""\
phenix.b_factor_statistics model.pdb [restraints.cif] [selection=...]

Show statistics for atomic displacement parameters (ADPs) or B-factors,
including TLS contribution if present.""")
    params = cmdline.work.extract()
  validate_params(params)
  import mmtbx.model
  import mmtbx.restraints
  from mmtbx.monomer_library import pdb_interpretation
  processed_pdb_file = pdb_interpretation.run(
    args=[params.adp_statistics.pdb_file] + params.adp_statistics.cif_file,
    substitute_non_crystallographic_unit_cell_if_necessary=True,
    log=out)
  geometry = processed_pdb_file.geometry_restraints_manager(show_energies=True)
  restraints_manager = mmtbx.restraints.manager(
    geometry = geometry,
    normalization = True)
  model = mmtbx.model.manager(
    xray_structure     = processed_pdb_file.xray_structure(),
    pdb_hierarchy      = processed_pdb_file.all_chain_proxies.pdb_hierarchy,
    restraints_manager = restraints_manager,
    log                = out)
  make_sub_header("Analyzing model B-factors", out=out)
  if (params.adp_statistics.selection is not None) :
    sel_cache = model.pdb_hierarchy().atom_selection_cache()
    selection = sel_cache.selection(params.adp_statistics.selection)
    n_sel = selection.count(True)
    if (n_sel == 0) :
      raise Sorry("No atoms in selection!")
    else :
      model = model.select(selection)
      print >> out, "Extracted %d atoms in selection:" % n_sel
      print >> out, "  %s" % params.adp_statistics.selection
      print >> out, ""
  stats = model.adp_statistics()
  stats.file_name = params.adp_statistics.pdb_file
  stats.selection = params.adp_statistics.selection
  stats.show_1(out=out)
  return stats
Exemple #25
0
  def print_atoms_occ_lt_1(self, hd_atoms_with_occ_0, single_hd_atoms_occ_lt_1,
                           prefix='', log=None):
    if (log is None):
      log = self.log

    if hd_atoms_with_occ_0:
      make_sub_header('H (or D) atoms with zero occupancy', out=log)
      for item in hd_atoms_with_occ_0:
        print('%s%s' % (prefix, item[0]), file=log)
    if single_hd_atoms_occ_lt_1:
      make_sub_header('H (or D) atoms with occupancy < 1', out=log)
      for item in single_hd_atoms_occ_lt_1:
        print('%s%s with occupancy %s' % (prefix, item[0], item[1]),
              file=log)
Exemple #26
0
 def check_work_root_folder(self):
     '''
 make sure the script is executed in work_root_folder
 '''
     make_sub_header('Check location')
     a1 = os.path.abspath(".")
     a2 = os.path.abspath(self.work_root_folder)
     print('Current folder: ', a1)
     print('Folder where script should be executed: ', a2)
     if a1 != a2:
         print('Wrong folder location')
         self.error = True
     else:
         print('OK')
 def show_ligand_occupancies(self):
     make_sub_header(' Occupancies ', out=self.log)
     pad1 = ' ' * 20
     print('If three values: min, max, mean, otherwise the same occupancy for entire ligand.', \
       file=self.log)
     for id_tuple, ligand_dict in self.items():
         for altloc, lr in ligand_dict.items():
             occs = lr.get_occupancies()
             if (occs.occ_min == occs.occ_max):
                 print(lr.id_str.ljust(16), occs.occ_min, file=self.log)
             else:
                 print(lr.id_str.ljust(16),
                       '%s   %s   %s' %
                       (occs.occ_min, occs.occ_max, occs.occ_mean),
                       file=self.log)
Exemple #28
0
  def print_outliers_bonds_angles(self, outliers_bonds, outliers_angles,
                                  prefix='', log=None):
    if (log is None):
      log = self.log

    if outliers_bonds:
      make_sub_header('Bond outliers', out=log)
      for item in outliers_bonds:
        print('%s%s, Bond %s, observed: %.3f, delta from target: %.3f' % \
          (prefix, item[0], item[1], item[2], item[3]), file=log)
    if outliers_angles:
      make_sub_header('Angle outliers', out=log)
      for item in outliers_angles:
        print('%s%s, Angle %s, observed: %.3f, delta from target: %.3f' % \
          (prefix, item[0], item[1], item[2], item[3]), file=self.log)
Exemple #29
0
def run(args, out=sys.stdout):
    from mmtbx.validation import waters
    import mmtbx.command_line
    master_phil = mmtbx.command_line.generate_master_phil_with_inputs("")
    cmdline = mmtbx.command_line.load_model_and_data(args=args,
                                                     master_phil=master_phil,
                                                     process_pdb_file=False,
                                                     out=out)
    result = waters.waters(pdb_hierarchy=cmdline.pdb_hierarchy,
                           xray_structure=cmdline.xray_structure,
                           fmodel=cmdline.fmodel,
                           collect_all=True)
    make_sub_header("Solvent analysis", out=out)
    result.show(out=out)
    return result
  def __init__(self, logger, folder, params):
    self.logger = logger
    self.folder = folder
    self.params = params
    self.prefix = os.path.basename(os.path.normpath(folder))
    self.pdb_code = self.prefix[0:4]
    self.map_code = self.prefix[5:]

    self.success = True

    make_header('Model: %s (emdb %s)' %
      (self.pdb_code, self.map_code), out=self.logger)
    make_sub_header('Initializing', out=self.logger)
    self.prepare_directory()
    self.initialize_json()
Exemple #31
0
 def show_ccs(self):
   '''
   Show results for correlation coefficients
   '''
   if self.fmodel is None: return
   make_sub_header(' Correlation coefficients ', out=self.log)
   for id_tuple, ligand_dict in self.items():
     for altloc, lr in ligand_dict.items():
       ccs = lr.get_ccs()
       cc_two_fofc = round(ccs.cc_two_fofc, 2)
       cc_fofc = round(ccs.cc_fofc, 2)
       fofc_min  = round(ccs.fofc_min, 2)
       fofc_max  = round(ccs.fofc_max, 2)
       fofc_mean = round(ccs.fofc_mean, 2)
       print(lr.id_str.ljust(16),
         cc_two_fofc, cc_fofc, fofc_min, fofc_max, fofc_mean, file = self.log)
Exemple #32
0
    def show(self, log=null_out()):
        """
    Print all hbonds in a table.
    """
        make_sub_header(' Hydrogen bonds', out=log)
        if self._hbonds_dict:
            # General information
            results = self.get_results()
            result_str = '{:<18} : {:5d}'
            print(result_str.format(' Number of H bonds', results.n_hbonds),
                  file=log)
            # print table with all H-bonds
            title1 = ['donor', 'acceptor', 'distance', 'angle']
            title1_str = '{:^33}|{:^16}|{:^21}|{:^14}|'
            print('\n' + title1_str.format(*title1), file=log)

            title2 = ['X', 'H', 'A', 'H...A', 'X...A', 'X-H...A', 'symop']
            title2_str = '{:^16}|{:^16}|{:^16}|{:^10}|{:^10}|{:^14}|{:^15}|'
            print(title2_str.format(*title2), file=log)
            #      lbl_str = '{:^49}|{:^16}|{:^11}|{:^15}'
            #      table_str = '{:>16}|{:>16}|{:^16.2f}|{:^11.2}|{:^15}|'
            table_str = '{:>16}|{:>16}|{:^16}|{:^10.2f}|{:^10.2f}|{:^14.2f}|{:^15}|'
            #      print(lbl_str.format(*labels), file=log)
            print('-' * 99, file=log)
            atoms = self.model.get_atoms()
            for iseq_tuple, record in self._hbonds_dict.iteritems():
                iseq_x, iseq_h, iseq_a = iseq_tuple
                if record[4] is not None:
                    symop = record[4]
                else:
                    symop = ''
                x_id_str = atoms[iseq_x].id_str().replace('pdb=',
                                                          '').replace('"', '')
                h_id_str = atoms[iseq_h].id_str().replace('pdb=',
                                                          '').replace('"', '')
                a_id_str = atoms[iseq_a].id_str().replace('pdb=',
                                                          '').replace('"', '')
                line = [
                    x_id_str, h_id_str, a_id_str,
                    round(record[0], 2),
                    round(record[1], 2),
                    round(record[2], 2), symop
                ]
                print(table_str.format(*line), file=log)
            print('-' * 99, file=log)
        else:
            print('No hbonds found', file=log)
Exemple #33
0
 def show_adps(self):
   '''
   Show results for ADPs of ligand and surrounding atoms
   '''
   make_sub_header(' ADPs ', out=self.log)
   pad1 = ' '*18
   print(pad1, "min   max    mean   n_iso   n_aniso", file=self.log)
   for id_tuple, ligand_dict in self.items():
     for altloc, lr in ligand_dict.items():
       adps = lr.get_adps()
       print(lr.id_str.ljust(14), '%7s%7s%7s%7s%7s' %
         (round(adps.b_min,1), round(adps.b_max,1), round(adps.b_mean,1),
          adps.n_iso, adps.n_aniso), file = self.log)
       if (adps.b_mean_within is not None):
         print('neighbors'.ljust(14), '%7s%7s%7s' %
           (round(adps.b_min_within,1), round(adps.b_max_within,1),
            round(adps.b_mean_within,1) ), file = self.log)
Exemple #34
0
 def get_folders_sorted_by_size(self):
     if self.error: return
     make_sub_header('Get map folders')
     folders = []
     size = flex.double()
     for d in os.listdir(emdb):
         dm = emdb + d + "/map/"
         if (not os.path.isdir(dm)): continue
         map_file = dm + os.listdir(dm)[0]
         if (os.path.isfile(map_file)):
             folders.append(d)
             size.append(os.path.getsize(map_file))
     tmp = []
     for i in flex.sort_permutation(size):
         tmp.append(folders[i])
     print('Number of folders with map files: ', len(tmp))
     return tmp
def run (args, out=sys.stdout, verbose=True) :
  import mmtbx.building.extend_sidechains
  import mmtbx.command_line
  input_out = out
  if (not verbose) :
    input_out = null_out()
  cmdline = mmtbx.command_line.load_model_and_data(
    args=args,
    master_phil=get_master_phil(),
    process_pdb_file=False,
    out=input_out,
    usage_string="""\
mmtbx.extend_sidechains model.pdb data.mtz [restraints.cif] [options]

Rebuild sidechains with missing non-hydrogen atoms.  Includes real-space
refinement (but needs work).""")
  params = cmdline.params
  prefix = os.path.splitext(os.path.basename(params.input.pdb.file_name[0]))[0]
  pdb_hierarchy = cmdline.pdb_hierarchy
  xray_structure = cmdline.xray_structure
  if (cmdline.params.input.sequence is not None) :
    from iotbx.bioinformatics import any_sequence_format
    sequences, nc = any_sequence_format(cmdline.params.input.sequence)
    make_sub_header("Correcting model sequence", out=out)
    n_changed = mmtbx.building.extend_sidechains.correct_sequence(
      pdb_hierarchy=pdb_hierarchy,
      sequences=sequences,
      out=out)
    if (n_changed == 0) :
      print >> out, "  No modifications required."
    else :
      xray_structure = pdb_hierarchy.extract_xray_structure(
        crystal_symmetry=xray_structure.crystal_symmetry())
      cmdline.fmodel.update_xray_structure(xray_structure,
        update_f_calc=True)
  return mmtbx.building.extend_sidechains.extend_and_refine(
    pdb_hierarchy=pdb_hierarchy,
    xray_structure=xray_structure,
    fmodel=cmdline.fmodel,
    params=params,
    prefix=prefix,
    cif_objects=[ co for fn, co in cmdline.cif_objects ],
    out=out,
    verbose=verbose,
    output_model=params.output_model,
    output_map_coeffs=params.output_map_coeffs)
  def show (self, out=None, header=True) :
    if (out is None) : out = sys.stdout
    if (header) :
      make_sub_header("Merging statistics", out=out)
    self.overall.show_summary(out)
    print >> out, ""
    print >> out, "Redundancies%s:" % self.anom_extra
    n_obs = sorted(self.overall.redundancies.keys())
    for x in n_obs :
      print >> out, "  %d : %d" % (x, self.overall.redundancies[x])
    print >> out, ""
    print >> out, """\
  Statistics by resolution bin:
 d_max  d_min   #obs  #uniq   mult.  %comp       <I>  <I/sI>  r_mrg r_meas  r_pim  cc1/2  cc_ano"""
    for bin_stats in self.bins :
      print >> out, bin_stats.format()
    print >> out, self.overall.format()
 def print_adps(self):
     make_sub_header(' ADPs ', out=self.log)
     pad1 = ' ' * 20
     print(pad1, "min   max    mean   n_iso   n_aniso", file=self.log)
     for id_tuple, ligand_dict in self.items():
         for altloc, lr in ligand_dict.items():
             adps = lr.get_adps()
             print(lr.id_str.ljust(14),
                   '%7s%7s%7s%7s%7s' %
                   (round(adps.b_min, 1), round(adps.b_max, 1),
                    round(adps.b_mean, 1), adps.n_iso, adps.n_aniso),
                   file=self.log)
             print('neighbors'.ljust(14),
                   '%7s%7s%7s' %
                   (round(adps.b_min_within, 1), round(
                       adps.b_max_within, 1), round(adps.b_mean_within, 1)),
                   file=self.log)
def run (args, out=sys.stdout) :
  from mmtbx.validation import waters
  import mmtbx.command_line
  master_phil = mmtbx.command_line.generate_master_phil_with_inputs("")
  cmdline = mmtbx.command_line.load_model_and_data(
    args=args,
    master_phil=master_phil,
    process_pdb_file=False,
    out=out)
  result = waters.waters(
    pdb_hierarchy=cmdline.pdb_hierarchy,
    xray_structure=cmdline.xray_structure,
    fmodel=cmdline.fmodel,
    collect_all=True)
  make_sub_header("Solvent analysis", out=out)
  result.show(out=out)
  return result
def run (args, out=sys.stdout) :
  from mmtbx.disorder import analyze_model
  import mmtbx.validation.molprobity
  import mmtbx.command_line
  cmdline = mmtbx.command_line.load_model_and_data(
    args=args,
    master_phil=master_phil(),
    require_data=False,
    create_fmodel=True,
    process_pdb_file=True,
    usage_string="mmtbx.analyze_static_disorder model.pdb",
    out=out)
  hierarchy = cmdline.pdb_hierarchy
  params = cmdline.params
  validation = mmtbx.validation.molprobity.molprobity(
    pdb_hierarchy=hierarchy,
    xray_structure=cmdline.xray_structure,
    fmodel=cmdline.fmodel,
    crystal_symmetry=cmdline.crystal_symmetry,
    geometry_restraints_manager=cmdline.geometry,
    header_info=None,
    keep_hydrogens=False,
    outliers_only=False,
    nuclear=False)
  segments = []
  make_header("Analyzing model", out=out)
  if (params.ignore_inconsistent_occupancy) :
    print >> out, "Discontinuous occupancies will be ignored."
  process = analyze_model.process_pdb_hierarchy(
    pdb_hierarchy=hierarchy,
    validation=validation,
    ignore_inconsistent_occupancy=params.ignore_inconsistent_occupancy,
    log=out)
  make_sub_header("MolProbity validation", out=out)
  validation.show_summary(out=out)
  make_sub_header("Disorder analysis", out=out)
  if (process.n_disordered == 0) :
    print >> out, "No alternate conformations found."
  else :
    process.show(out=out, verbose=params.verbose)
  if (params.pickle) :
    file_name = os.path.basename(
      os.path.splitext(params.input.pdb.file_name[0])[0]) + ".pkl"
    easy_pickle.dump(file_name, process)
  return process
Exemple #40
0
def run(args, out=sys.stdout):
    from mmtbx.disorder import analyze_model
    import mmtbx.validation.molprobity
    import mmtbx.command_line
    cmdline = mmtbx.command_line.load_model_and_data(
        args=args,
        master_phil=master_phil(),
        require_data=False,
        create_fmodel=True,
        process_pdb_file=True,
        usage_string="mmtbx.analyze_static_disorder model.pdb",
        out=out)
    hierarchy = cmdline.pdb_hierarchy
    params = cmdline.params
    validation = mmtbx.validation.molprobity.molprobity(
        pdb_hierarchy=hierarchy,
        xray_structure=cmdline.xray_structure,
        fmodel=cmdline.fmodel,
        crystal_symmetry=cmdline.crystal_symmetry,
        geometry_restraints_manager=cmdline.geometry,
        header_info=None,
        keep_hydrogens=False,
        outliers_only=False,
        nuclear=False)
    segments = []
    make_header("Analyzing model", out=out)
    if (params.ignore_inconsistent_occupancy):
        print("Discontinuous occupancies will be ignored.", file=out)
    process = analyze_model.process_pdb_hierarchy(
        pdb_hierarchy=hierarchy,
        validation=validation,
        ignore_inconsistent_occupancy=params.ignore_inconsistent_occupancy,
        log=out)
    make_sub_header("MolProbity validation", out=out)
    validation.show_summary(out=out)
    make_sub_header("Disorder analysis", out=out)
    if (process.n_disordered == 0):
        print("No alternate conformations found.", file=out)
    else:
        process.show(out=out, verbose=params.verbose)
    if (params.pickle):
        file_name = os.path.basename(
            os.path.splitext(params.input.pdb.file_name[0])[0]) + ".pkl"
        easy_pickle.dump(file_name, process)
    return process
Exemple #41
0
  def analyze_waters (self, out=sys.stdout, debug=True, candidates=Auto) :
    """
    Uses a SVM to analyze all of a model's water sites and decide whether to
    re-assign them as ions.

    Parameters
    ----------
    out : file, optional
    debug : bool, optional
    candidates : list of str, optional

    Returns
    -------
    list of svm_prediction
    """
    waters = self._extract_waters()
    print >> out, "  %d waters to analyze" % len(waters)
    print >> out, ""
    if (len(waters) == 0) : return
    #nproc = easy_mp.get_processes(self.nproc)
    predictions = []
    for i_seq in waters :
      prediction = self.analyze_water(
        i_seq=i_seq,
        debug=debug,
        candidates=candidates,
        filter_outputs=self.params.svm.filtered_outputs)
      if (prediction is not None) :
        predictions.append(prediction)
    filtered = []
    for result in predictions :
      if (debug) :
        result.show(out=out, prefix="  ")
        print >> out, ""
      if (result.final_choice is not None) :
        filtered.append(result)
    if (len(filtered) == 0) :
      print >> out, ""
      print >> out, "  No waters could be classified as possible ions."
    else :
      make_sub_header("Predicted ions", out=out)
      for result in filtered :
        result.show_brief(out=out, prefix="  ")
    return filtered
 def build_residue_conformers (self, stop_if_none=False) :
   self.extract_selection()
   print >> self.out, ""
   #self.fmodel.info().show_targets(out=self.out, text="starting model")
   make_sub_header("Fitting individual residues", out=self.out)
   t1 = time.time()
   params = self.params
   self.pdb_hierarchy, n_alternates = single_residue.build_cycle(
     pdb_hierarchy = self.pdb_hierarchy,
     fmodel = self.fmodel,
     geometry_restraints_manager = self.geometry_restraints_manager,
     params = params,
     cif_objects=self.cif_objects,
     selection=params.selection,
     nproc=params.nproc,
     verbose=self.verbose,
     debug=self.debug,
     out=self.out)
   if (n_alternates == 0) and (stop_if_none) :
     raise Sorry("No new conformations generated.")
   return n_alternates
 def rejoin (self) :
   make_sub_header("Re-joining identical conformers", out=self.out)
   pdb_hierarchy = self.pdb_hierarchy.deep_copy()
   n_modified = alternate_conformations.rejoin_split_single_conformers(
     pdb_hierarchy=pdb_hierarchy,
     crystal_symmetry=self.fmodel.xray_structure,
     model_error_ml=self.fmodel.model_error_ml(),
     params=self.params.merging,
     reset_occupancies=self.params.refinement.constrain_correlated_occupancies,
     verbose=self.verbose,
     log=self.out)
   if (n_modified > 0) :
     self.pdb_hierarchy = pdb_hierarchy
     xray_structure = self.pdb_hierarchy.extract_xray_structure(
       crystal_symmetry=self.fmodel.xray_structure)
     self.fmodel.update_xray_structure(xray_structure)
     self.map_file = None
   alternate_conformations.finalize_model(
     pdb_hierarchy=self.pdb_hierarchy,
     xray_structure=self.pdb_hierarchy.extract_xray_structure(
       crystal_symmetry=self.fmodel.xray_structure),
     set_b_iso=None,
     convert_to_isotropic=False)
   return (n_modified > 0)
def run (args, out=sys.stdout) :
  usage_string = """\
mmtbx.validate_ligands model.pdb data.mtz LIGAND_CODE [...]

Print out basic statistics for residue(s) with the given code(s), including
electron density values/CC.
"""
  import mmtbx.validation.ligands
  import mmtbx.command_line
  args_ = []
  for arg in args :
    if (len(arg) == 3) and arg.isalnum() and (not os.path.exists(arg)) :
      args_.append("ligand_code=%s" % arg)
    else :
      args_.append(arg)
  cmdline = mmtbx.command_line.load_model_and_data(
    args=args_,
    master_phil=master_phil(),
    process_pdb_file=False,
    usage_string=usage_string)
  params = cmdline.params
  if (params.ligand_code is None) or (len(params.ligand_code) == 0) :
    raise Sorry("Ligand code required!")
  make_sub_header("Validating ligands", out=out)
  for ligand_code in params.ligand_code :
    validations = mmtbx.validation.ligands.validate_ligands(
      pdb_hierarchy=cmdline.pdb_hierarchy,
      fmodel=cmdline.fmodel,
      ligand_code=ligand_code,
      reference_structure=params.reference_structure,
      only_segid=params.only_segid)
    if (validations is None) :
      raise Sorry("No ligands named '%s' found." % ligand_code)
    mmtbx.validation.ligands.show_validation_results(validations=validations,
      out=out,
      verbose=params.verbose)
def make_sub_header(text, out=None):
  if (out is None): out = sys.stdout
  str_utils.make_sub_header(text, out=out, header_len=80)
 def __init__ (self,
     model_names,
     model_data,
     f_obs,
     r_free_flags,
     params=None,
     skip_twin_detection=False,
     nproc=1,
     log=sys.stdout) :
   if (params is None) :
     params = master_phil.extract()
   self.model_names = model_names
   if (model_data is None) :
     from iotbx.file_reader import any_file
     model_data = []
     for file_name in model_names :
       if (not os.path.isfile(file_name)) :
         raise RuntimeError("model_data is None, but %s is not a file." %
           file_name)
       model_in = any_file(file_name,
         force_type="pdb",
         raise_sorry_if_errors=True).file_object
       pdb_hierarchy = model_in.hierarchy
       xray_structure = model_in.xray_structure_simple()
       model_data.append((pdb_hierarchy, xray_structure))
   self.model_symmetries = []
   self.models_accepted = []
   self.model_r_frees = []
   self.f_obs = f_obs.resolution_filter(d_min=params.d_min)
   self.r_free_flags = r_free_flags.common_set(other=self.f_obs)
   self.skip_twin_detection = skip_twin_detection
   self.params = params
   self.evaluations = None
   self.best_xray_structure = None
   self.best_pdb_hierarchy = None
   self.best_result = None
   self.best_model_name = None
   from mmtbx.pdb_symmetry import rms_difference
   from iotbx import file_reader
   data_symmetry = f_obs.crystal_symmetry()
   data_space_group = data_symmetry.space_group()
   data_point_group = data_space_group.build_derived_point_group()
   data_unit_cell = data_symmetry.unit_cell()
   data_cell_edges = data_unit_cell.parameters()[0:3]
   data_cell_angles = data_unit_cell.parameters()[3:6]
   make_sub_header("Evaluating models", out=log)
   print >> log, "Experimental data:"
   print >> log, "  space group:  %s" % data_space_group.info()
   print >> log, "  unit cell:    %s" % ucf(data_unit_cell)
   pdb_hierarchies = []
   xray_structures = []
   for k, file_name in enumerate(model_names) :
     pdb_hierarchy, xray_structure = model_data[k]
     pdb_hierarchy.atoms().reset_i_seq()
     pdb_hierarchies.append(pdb_hierarchy)
     model_symmetry = xray_structure.crystal_symmetry()
     self.model_symmetries.append(model_symmetry)
     if (model_symmetry is None) :
       print >> log, "Model %d is missing symmetry records:" % (k+1)
       print >> log, "  source:  %s" % file_name
       xray_structures.append(None)
       continue
     model_unit_cell = model_symmetry.unit_cell()
     model_space_group = model_symmetry.space_group()
     is_compatible_sg = False
     if (model_space_group == data_space_group) :
       is_compatible_sg = True
     else :
       model_point_group = model_space_group.build_derived_point_group()
       if (data_point_group == model_point_group) :
         is_compatible_sg = True
     if (not is_compatible_sg) :
       print >> log, "Model %d has incompatible space group:" % (k+1)
       print >> log, "  source:  %s" % file_name
       print >> log, "  space group: %s" % model_space_group.info()
       xray_structures.append(None)
       continue
     is_similar_cell = False
     if (model_unit_cell.is_similar_to(data_unit_cell)) :
       is_similar_cell = True
     else :
       model_cell_edges = model_unit_cell.parameters()[0:3]
       model_cell_angles = model_unit_cell.parameters()[3:6]
       cell_edge_rmsd = rms_difference(model_cell_edges, data_cell_edges)
       cell_angle_rmsd = rms_difference(model_cell_angles, data_cell_angles)
       if ((cell_edge_rmsd <= params.max_cell_edge_rmsd) and
           (cell_angle_rmsd <= params.max_cell_angle_rmsd)) :
         is_similar_cell = True
     if (not is_similar_cell) :
       print >> log, "Model %d has incompatible space group:" % (k+1)
       print >> log, "  source: %s" % file_name
       print >> log, "  model:  %s" % ucf(model_unit_cell)
       xray_structures.append(None)
       continue
     else :
       xray_structures.append(xray_structure)
   if (xray_structures.count(None) != len(xray_structures)) :
     print >> log, ""
     print >> log, "Calculating R-factors - will use %s processors." % nproc
     evaluations = easy_mp.parallel_map(
       func=self.evaluate_model,
       iterable=zip(xray_structures, pdb_hierarchies),
       processes=nproc)
     passed = []
     for k, result in enumerate(evaluations) :
       if (result is not None) :
         if (result.r_free <= params.max_r_free) :
           passed.append((k, result))
     if (len(passed) > 0) :
       passed.sort(lambda a,b: cmp(a[1].r_free, b[1].r_free))
       i_result, result = passed[0]
       self.evaluations = passed
       self.best_xray_structure = result.xray_structure
       self.best_pdb_hierarchy = pdb_hierarchies[i_result]
       self.best_result = result
       self.best_model_name = self.model_names[i_result]
   self.show(out=log, verbose=True)
def strip_model (
    pdb_hierarchy=None,
    xray_structure=None,
    file_name=None,
    params=None,
    remove_waters=True,
    remove_hydrogens=True,
    remove_alt_confs=True,
    convert_semet_to_met=True,
    convert_to_isotropic=True,
    reset_occupancies=True,
    remove_ligands=False,
    reset_hetatm_flag=False,
    preserve_remarks=False,
    preserve_symmetry=True,
    add_remarks=None,
    output_file=None,
    log=None) :
  """
  Utility for removing extraneous records from a model intended for use in
  molecular replacement, etc., including waters, alternate conformations,
  and other features specific to a particular dataset.
  """
  if (params is not None) :
    remove_waters = params.remove_waters
    remove_hydrogens = params.remove_hydrogens
    remove_alt_confs = params.remove_alt_confs
    convert_semet_to_met = params.convert_semet_to_met
    convert_to_isotropic = params.convert_to_isotropic
    reset_occupancies = params.reset_occupancies
    remove_ligands = params.remove_ligands
    reset_hetatm_flag = params.reset_hetatm_flag
  if (log is None) :
    log = null_out()
  make_sub_header("Processing input model", out=log)
  from mmtbx import pdbtools
  remarks = None
  if (file_name is not None) :
    print >> log, "Reading model from %s" % file_name
    assert ([pdb_hierarchy, xray_structure] == [None, None])
    from iotbx import file_reader
    pdb_in = file_reader.any_file(file_name, force_type="pdb",
      raise_sorry_if_errors=True)
    pdb_in.check_file_type("pdb")
    remarks = pdb_in.file_object.input.remark_section()
    pdb_hierarchy = pdb_in.file_object.hierarchy
    xray_structure = pdb_in.file_object.xray_structure_simple()
  else :
    # XXX work with copies, not the original structure
    pdb_hierarchy = pdb_hierarchy.deep_copy()
    xray_structure = xray_structure.deep_copy_scatterers()
  pdb_hierarchy.atoms().reset_i_seq()
  if (len(pdb_hierarchy.models()) > 1) :
    raise Sorry("Multiple models not supported.")
  if (remove_hydrogens) :
    sele = ~(xray_structure.hd_selection())
    n_hd = sele.count(False)
    if (n_hd > 0) :
      pdb_hierarchy = pdb_hierarchy.select(sele)
      xray_structure = xray_structure.select(sele)
      print >> log, "  removed %d hydrogens" % n_hd
      pdb_hierarchy.atoms().reset_i_seq()
  if (remove_waters) :
    sele = pdb_hierarchy.atom_selection_cache().selection("not (resname HOH)")
    n_wat = sele.count(False)
    if (n_wat > 0) :
      pdb_hierarchy = pdb_hierarchy.select(sele)
      xray_structure = xray_structure.select(sele)
      print >> log, "  removed %d waters" % n_wat
      pdb_hierarchy.atoms().reset_i_seq()
  assert_identical_id_str = True
  if (remove_alt_confs) :
    n_atoms_start = xray_structure.scatterers().size()
    pdbtools.remove_alt_confs(pdb_hierarchy)
    i_seqs = pdb_hierarchy.atoms().extract_i_seq()
    n_atoms_end = i_seqs.size()
    if (n_atoms_end != n_atoms_start) :
      print >> log, "  removed %d atoms in alternate conformations" % \
        (n_atoms_end - n_atoms_start)
      assert_identical_id_str = False
    xray_structure = xray_structure.select(i_seqs)
    pdb_hierarchy.atoms().reset_i_seq()
  if (convert_semet_to_met) :
    # XXX need to start from a copy here because the atom-parent relationship
    # seems to be messed up otherwise.  this is probably a bug.
    pdb_hierarchy = pdb_hierarchy.deep_copy()
    n_mse = pdbtools.convert_semet_to_met(
      pdb_hierarchy=pdb_hierarchy,
      xray_structure=xray_structure)
    if (n_mse > 0) :
      print >> log, "  removed %d selenomethionine (MSE) residues" % n_mse
      assert_identical_id_str = False
      open("tmp1.pdb", "w").write(pdb_hierarchy.as_pdb_string())
      sel = pdb_hierarchy.atom_selection_cache().selection
      assert sel("resname MSE").count(True) == 0
  if (convert_to_isotropic) :
    xray_structure.convert_to_isotropic()
    pdb_hierarchy.adopt_xray_structure(xray_structure,
      assert_identical_id_str=assert_identical_id_str)
    print >> log, "  converted all atoms to isotropic B-factors"
  if (reset_occupancies) :
    assert (remove_alt_confs)
    xray_structure.adjust_occupancy(occ_max=1.0, occ_min=1.0)
    pdb_hierarchy.adopt_xray_structure(xray_structure,
      assert_identical_id_str=assert_identical_id_str)
    print >> log, "  reset occupancy to 1.0 for all atoms"
  if (reset_hetatm_flag) :
    for atom in pdb_hierarchy.atoms() :
      atom.hetero = False
  if (remove_ligands) :
    pdb_hierarchy.atoms().reset_i_seq()
    model = pdb_hierarchy.only_model()
    for chain in model.chains() :
      if (not chain.is_protein()) and (not chain.is_na()) :
        print >> log, "  removing %d ligand atoms in chain '%s'" % \
          (len(chain.atoms()), chain.id)
        model.remove_chain(chain)
    i_seqs = pdb_hierarchy.atoms().extract_i_seq()
    xray_structure = xray_structure.select(i_seqs)
    pdb_hierarchy.atoms().reset_i_seq()
  assert xray_structure.scatterers().size() == pdb_hierarchy.atoms_size()
  if (output_file is not None) :
    f = open(output_file, "w")
    if (add_remarks is not None) :
      f.write("\n".join(add_remarks))
      f.write("\n")
    if (preserve_remarks) and (remarks is not None) :
      f.write("\n".join(remarks))
      f.write("\n")
    symm = None
    if (preserve_symmetry) :
      symm = xray_structure
    f.write(pdb_hierarchy.as_pdb_string(crystal_symmetry=symm))
    f.close()
    print >> log, "  wrote model to %s" % output_file
  return pdb_hierarchy, xray_structure
def run(args, log=sys.stdout):
  print >> log, "-"*79
  print >> log, legend
  print >> log, "-"*79
  inputs = mmtbx.utils.process_command_line_args(args = args,
    master_params = master_params())
  params = inputs.params.extract()
  # estimate resolution
  d_min = params.resolution
  broadcast(m="Map resolution:", log=log)
  if(d_min is None):
    raise Sorry("Resolution is required.")
  print >> log, "  d_min: %6.4f"%d_min
  # model
  broadcast(m="Input PDB:", log=log)
  file_names = inputs.pdb_file_names
  if(len(file_names) != 1): raise Sorry("PDB file has to given.")
  if(inputs.crystal_symmetry is None):
    raise Sorry("No crystal symmetry defined.")
  processed_pdb_file = monomer_library.pdb_interpretation.process(
    mon_lib_srv      = monomer_library.server.server(),
    ener_lib         = monomer_library.server.ener_lib(),
    file_name        = file_names[0],
    crystal_symmetry = inputs.crystal_symmetry,
    force_symmetry   = True,
    log              = None)
  ph = processed_pdb_file.all_chain_proxies.pdb_hierarchy
  if(len(ph.models())>1):
    raise Sorry("Only one model allowed.")
  xrs = processed_pdb_file.xray_structure()
  xrs.scattering_type_registry(table = params.scattering_table)
  xrs.show_summary(f=log, prefix="  ")
  # restraints
  sctr_keys = xrs.scattering_type_registry().type_count_dict().keys()
  has_hd = "H" in sctr_keys or "D" in sctr_keys
  geometry = processed_pdb_file.geometry_restraints_manager(
    show_energies      = False,
    assume_hydrogens_all_missing = not has_hd,
    plain_pairs_radius = 5.0)
  # map
  broadcast(m="Input map:", log=log)
  if(inputs.ccp4_map is None): raise Sorry("Map file has to given.")
  inputs.ccp4_map.show_summary(prefix="  ")
  map_data = inputs.ccp4_map.map_data()
  print >> log, "  Actual map (min,max,mean):", \
    map_data.as_1d().min_max_mean().as_tuple()
  make_sub_header("Histogram of map values", out=log)
  md = map_data.as_1d()
  show_histogram(data=md, n_slots=10, data_min=flex.min(md),
    data_max=flex.max(md), log=log)
  # shift origin if needed
  shift_needed = not \
    (map_data.focus_size_1d() > 0 and map_data.nd() == 3 and
     map_data.is_0_based())
  if(shift_needed):
    N = map_data.all()
    O=map_data.origin()
    map_data = map_data.shift_origin()
    # apply same shift to the model
    a,b,c = xrs.crystal_symmetry().unit_cell().parameters()[:3]
    sites_cart = xrs.sites_cart()
    sx,sy,sz = a/N[0]*O[0], b/N[1]*O[1], c/N[2]*O[2]
    sites_cart_shifted = sites_cart-\
      flex.vec3_double(sites_cart.size(), [sx,sy,sz])
    xrs.set_sites_cart(sites_cart_shifted)
  ####
  # Compute and show all stats
  ####
  broadcast(m="Model statistics:", log=log)
  make_sub_header("Overall", out=log)
  ms = model_statistics.geometry(
    pdb_hierarchy      = ph,
    restraints_manager = geometry,
    molprobity_scores  = True)
  ms.show()
  make_sub_header("Histogram of devations from ideal bonds", out=log)
  show_histogram(data=ms.bond_deltas, n_slots=10, data_min=0, data_max=0.2,
    log=log)
  #
  make_sub_header("Histogram of devations from ideal angles", out=log)
  show_histogram(data=ms.angle_deltas, n_slots=10, data_min=0, data_max=30.,
    log=log)
  #
  make_sub_header("Histogram of non-bonded distances", out=log)
  show_histogram(data=ms.nonbonded_distances, n_slots=10, data_min=0,
    data_max=5., log=log)
  #
  make_sub_header("Histogram of ADPs", out=log)
  bs = xrs.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.)
  show_histogram(data=bs, n_slots=10, data_min=flex.min(bs),
    data_max=flex.max(bs), log=log)
  #
  # Compute FSC(map, model)
  broadcast(m="Map-model FSC:", log=log)
  mmtbx.maps.correlation.fsc_model_map(
    xray_structure=xrs, map=map_data, d_min=d_min, log=log)
  #
  # various CC
  cc_calculator = mmtbx.maps.correlation.from_map_and_xray_structure_or_fmodel(
    xray_structure = xrs,
    map_data       = map_data,
    d_min          = d_min)
  broadcast(m="Map-model CC:", log=log)
  print >> log, "Overall (entire box):  %6.4f"%cc_calculator.cc()
  print >> log, "Around atoms (masked): %6.4f"%cc_calculator.cc(
    selection=flex.bool(xrs.scatterers().size(), True))
  # per chain
  print >> log, "Per chain:"
  for chain in ph.chains():
    print >> log, "  chain %s: %6.4f"%(chain.id, cc_calculator.cc(
      selection=chain.atoms().extract_i_seq()))
  # per residue
  print >> log, "Per residue:"
  for rg in ph.residue_groups():
    cc = cc_calculator.cc(selection=rg.atoms().extract_i_seq())
    print >> log, "  chain id: %s resid %s: %6.4f"%(
      rg.parent().id, rg.resid(), cc)
  # per residue detailed counts
  print >> log, "Per residue (histogram):"
  crystal_gridding = maptbx.crystal_gridding(
    unit_cell             = xrs.unit_cell(),
    space_group_info      = xrs.space_group_info(),
    pre_determined_n_real = map_data.accessor().all())
  f_calc = xrs.structure_factors(d_min=d_min).f_calc()
  fft_map = miller.fft_map(
    crystal_gridding     = crystal_gridding,
    fourier_coefficients = f_calc)
  fft_map.apply_sigma_scaling()
  map_model = fft_map.real_map_unpadded()
  sites_cart = xrs.sites_cart()
  cc_per_residue = flex.double()
  for rg in ph.residue_groups():
    cc = mmtbx.maps.correlation.from_map_map_atoms(
      map_1      = map_data,
      map_2      = map_model,
      sites_cart = sites_cart.select(rg.atoms().extract_i_seq()),
      unit_cell  = xrs.unit_cell(),
      radius     = 2.)
    cc_per_residue.append(cc)
  show_histogram(data=cc_per_residue, n_slots=10, data_min=-1., data_max=1.0,
    log=log)
def find_and_build_ions (
      manager,
      fmodels,
      model,
      wavelength,
      params,
      nproc=1,
      elements=Auto,
      out=None,
      run_ordered_solvent=False,
      occupancy_strategy_enabled=False,
      group_anomalous_strategy_enabled=False,
      use_svm=None) :
  """
  Analyzes the water molecules in a structure and re-labels them as ions if
  they scatter and bind environments that we expect of that ion.

  Parameters
  ----------
  manager : mmtbx.ions.identity.manager
  fmodels : mmtbx.fmodels
  model : mmtbx.model.manager
  wavelength : float
  params : libtbx.phil.scope_extract
  nproc : int, optional
  elements : list of str, optional
  out : file, optional
  run_ordered_solvent : bool, optional
  occupancy_strategy_enabled : bool, optional
  group_anomalous_strategy_enabled : bool, optional
  use_svm : bool, optional

  See Also
  --------
  mmtbx.ions.identify.manager.analyze_waters
  """
  import mmtbx.refinement.minimization
  from mmtbx.refinement.anomalous_scatterer_groups import \
    get_single_atom_selection_string
  from mmtbx.refinement import anomalous_scatterer_groups
  import mmtbx.ions.identify
  import mmtbx.ions.svm
  from cctbx.eltbx import sasaki
  from cctbx import crystal
  from cctbx import adptbx
  from cctbx import xray
  from scitbx.array_family import flex
  import scitbx.lbfgs
  if (use_svm is None) :
    use_svm = getattr(params, "use_svm", False)
  assert (1.0 >= params.initial_occupancy >= 0)
  fmodel = fmodels.fmodel_xray()
  anomalous_flag = fmodel.f_obs().anomalous_flag()
  if (out is None) : out = sys.stdout
  model.xray_structure = fmodel.xray_structure
  model.xray_structure.tidy_us()
  pdb_hierarchy = model.pdb_hierarchy(sync_with_xray_structure=True)
  pdb_atoms = pdb_hierarchy.atoms()
  pdb_atoms.reset_i_seq()
  # FIXME why does B for anisotropic waters end up negative?
  u_iso = model.xray_structure.extract_u_iso_or_u_equiv()
  for i_seq, atom in enumerate(pdb_atoms) :
    labels = atom.fetch_labels()
    if (labels.resname == "HOH") and (atom.b < 0) :
      assert (u_iso[i_seq] >= 0)
      atom.b = adptbx.u_as_b(u_iso[i_seq])
  if (manager is None) :
    manager_class = None
    if (use_svm) :
      manager_class = mmtbx.ions.svm.manager
      if params.svm.svm_name == "merged_high_res" :
        params.find_anomalous_substructure = False
        params.use_phaser = False
    manager = mmtbx.ions.identify.create_manager(
      pdb_hierarchy=pdb_hierarchy,
      geometry_restraints_manager=model.restraints_manager.geometry,
      fmodel=fmodel,
      wavelength=wavelength,
      params=params,
      nproc=nproc,
      verbose=params.debug,
      log=out,
      manager_class=manager_class)
  else :
    grm = model.restraints_manager.geometry
    connectivity = grm.shell_sym_tables[0].full_simple_connectivity()
    manager.update_structure(
      pdb_hierarchy=pdb_hierarchy,
      xray_structure=fmodel.xray_structure,
      connectivity=connectivity,
      log=out)
    manager.update_maps()
  model.update_anomalous_groups(out=out)
  make_sub_header("Analyzing water molecules", out=out)
  manager.show_current_scattering_statistics(out=out)
  anomalous_groups = []
  # XXX somehow comma-separation of phil strings fields doesn't work
  if (isinstance(elements, list)) and (len(elements) == 1) :
    elements = elements[0].split(",")
  water_ion_candidates = manager.analyze_waters(
    out=out,
    candidates=elements)
  modified_iselection = flex.size_t()
  default_b_iso = manager.get_initial_b_iso()
  # Build in the identified ions
  for_building = []
  if (use_svm) :
    for result in water_ion_candidates :
      for_building.append((result.i_seq, result.final_choice))
  else :
    for i_seq, final_choices, two_fofc in water_ion_candidates :
      if (len(final_choices) == 1) :
        for_building.append((i_seq, final_choices[0]))
  skipped = []
  if (len(for_building) > 0) :
    make_sub_header("Adding %d ions to model" % len(for_building), out)
    for k, (i_seq, final_choice) in enumerate(for_building) :
      atom = manager.pdb_atoms[i_seq]
      skip = False
      for other_i_seq, other_ion in for_building[:k] :
        if (other_i_seq in skipped) : continue
        if (((other_ion.charge > 0) and (final_choice.charge > 0)) or
            ((other_ion.charge < 0) and (final_choice.charge < 0))) :
          other_atom = manager.pdb_atoms[other_i_seq]
          dxyz = atom.distance(other_atom)
          if (dxyz < params.max_distance_between_like_charges) :
            print >> out, \
              "  %s (%s%+d) is only %.3fA from %s (%s%+d), skipping for now" %\
              (atom.id_str(), final_choice.element, final_choice.charge, dxyz,
               other_atom.id_str(), other_ion.element, other_ion.charge)
            skipped.append(i_seq)
            skip = True
            break
      if (skip) : continue
      print >> out, "  %s becomes %s%+d" % \
          (atom.id_str(), final_choice.element, final_choice.charge)
      refine_adp = params.refine_ion_adp
      if (refine_adp == "Auto") :
        if (fmodel.f_obs().d_min() <= 1.5) :
          refine_adp = "anisotropic"
        elif (fmodel.f_obs().d_min() < 2.5) :
          atomic_number = sasaki.table(final_choice.element).atomic_number()
          if (atomic_number >= 19) :
            refine_adp = "anisotropic"
      # Modify the atom object - this is clumsy but they will be grouped into
      # a single chain at the end of refinement
      initial_b_iso = params.initial_b_iso
      if (initial_b_iso is Auto) :
        initial_b_iso = manager.guess_b_iso_real(i_seq)
      element = final_choice.element
      if (element == "IOD") : # FIXME
        element = "I"
      modified_atom = model.convert_atom(
        i_seq=i_seq,
        scattering_type=final_choice.scattering_type(),
        atom_name=element,
        element=element,
        charge=final_choice.charge,
        residue_name=final_choice.element,
        initial_occupancy=params.initial_occupancy,
        initial_b_iso=initial_b_iso,
        chain_id=params.ion_chain_id,
        segid="ION",
        refine_adp=refine_adp,
        refine_occupancies=False) #params.refine_ion_occupancies)
      if (params.refine_anomalous) and (anomalous_flag) :
        scatterer = model.xray_structure.scatterers()[i_seq]
        if (wavelength is not None) :
          fp_fdp_info = sasaki.table(final_choice.element).at_angstrom(
            wavelength)
          scatterer.fp = fp_fdp_info.fp()
          scatterer.fdp = fp_fdp_info.fdp()
          print >> out, "    setting f'=%g, f''=%g" % (scatterer.fp,
            scatterer.fdp)
        group = xray.anomalous_scatterer_group(
          iselection=flex.size_t([i_seq]),
          f_prime=scatterer.fp,
          f_double_prime=scatterer.fdp,
          refine=["f_prime","f_double_prime"],
          selection_string=get_single_atom_selection_string(modified_atom),
          update_from_selection=True)
        anomalous_groups.append(group)
      modified_iselection.append(i_seq)
  if (len(modified_iselection) > 0) :
    scatterers = model.xray_structure.scatterers()
    # FIXME not sure this is actually working as desired...
    site_symmetry_table = model.xray_structure.site_symmetry_table()
    for i_seq in site_symmetry_table.special_position_indices() :
      scatterers[i_seq].site = crystal.correct_special_position(
        crystal_symmetry=model.xray_structure,
        special_op=site_symmetry_table.get(i_seq).special_op(),
        site_frac=scatterers[i_seq].site,
        site_label=scatterers[i_seq].label,
        tolerance=1.0)
    model.xray_structure.replace_scatterers(scatterers=scatterers)
    def show_r_factors () :
       return "r_work=%6.4f r_free=%6.4f" % (fmodel.r_work(), fmodel.r_free())
    fmodel.update_xray_structure(
      xray_structure=model.xray_structure,
      update_f_calc=True,
      update_f_mask=True)
    n_anom = len(anomalous_groups)
    refine_anomalous = anomalous_flag and params.refine_anomalous and n_anom>0
    refine_occupancies = ((params.refine_ion_occupancies or refine_anomalous)
      and ((not occupancy_strategy_enabled) or
           (model.refinement_flags.s_occupancies is None) or
           (len(model.refinement_flags.s_occupancies) == 0)))
    if (refine_anomalous) :
      if ((model.anomalous_scatterer_groups is not None) and
          (group_anomalous_strategy_enabled)) :
        model.anomalous_scatterer_groups.extend(anomalous_groups)
        refine_anomalous = False
    if (refine_occupancies) or (refine_anomalous) :
      print >> out, ""
      print >> out, "  occupancy refinement (new ions only): start %s" % \
        show_r_factors()
      fmodel.xray_structure.scatterers().flags_set_grads(state = False)
      fmodel.xray_structure.scatterers().flags_set_grad_occupancy(
        iselection = modified_iselection)
      lbfgs_termination_params = scitbx.lbfgs.termination_parameters(
        max_iterations = 25)
      minimized = mmtbx.refinement.minimization.lbfgs(
        restraints_manager       = None,
        fmodels                  = fmodels,
        model                    = model,
        is_neutron_scat_table    = False,
        lbfgs_termination_params = lbfgs_termination_params)
      fmodel.xray_structure.adjust_occupancy(
        occ_max   = 1.0,
        occ_min   = 0,
        selection = modified_iselection)
      zero_occ = []
      for i_seq in modified_iselection :
        occ = fmodel.xray_structure.scatterers()[i_seq].occupancy
        if (occ == 0) :
          zero_occ.append(i_seq)
      fmodel.update_xray_structure(
        update_f_calc=True,
        update_f_mask=True)
      print >> out, "                                        final %s" % \
        show_r_factors()
      if (len(zero_occ) > 0) :
        print >> out, "  WARNING: occupancy dropped to zero for %d atoms:"
        atoms = model.pdb_hierarchy().atoms()
        for i_seq in zero_occ :
          print >> out, "    %s" % atoms[i_seq].id_str(suppress_segid=True)
      print >> out, ""
    if (refine_anomalous) :
      assert fmodel.f_obs().anomalous_flag()
      print >> out, "  anomalous refinement (new ions only): start %s" % \
        show_r_factors()
      fmodel.update(target_name="ls")
      anomalous_scatterer_groups.minimizer(
        fmodel=fmodel,
        groups=anomalous_groups)
      fmodel.update(target_name="ml")
      print >> out, "                                        final %s" % \
        show_r_factors()
      print >> out, ""
  return manager
 def run_resolve (self) :
   from solve_resolve.resolve_python import resolve_in_memory
   from iotbx import pdb
   from scitbx.array_family import flex
   make_sub_header("RESOLVE build", out=self.out)
   mean_density_start = self.mean_density_at_sites()
   cc_start = self.cc_model_map()
   sites_start = self.get_selected_sites(hydrogens=False)
   t1 = time.time()
   pdb_inp = self.box_selected_hierarchy.as_pdb_input()
   inp_hierarchy = pdb_inp.construct_hierarchy()
   chain = inp_hierarchy.only_model().only_chain()
   first_resseq = chain.residue_groups()[0].resseq_as_int()
   seq = "".join(chain.only_conformer().as_sequence(substitute_unknown='A'))
   resolve_args = [
     "start_chain 1 %d" % first_resseq,
     "extend_only",
     "skip_hetatm",
     "no_merge_ncs_copies",
     "no_optimize_ncs",
     "i_ran_seed %d" % int(time.time() % os.getpid()),
   ]
   if (self.params.build_new_loop) : # XXX not really working...
     n_res = len(chain.residue_groups())
     assert (n_res >= 3)
     k = 0
     for residue_group in chain.residue_groups()[1:-1] :
       print >> self.out, "  removing residue group %s %s" % \
         (chain.id, residue_group.resid())
       chain.remove_residue_group(residue_group)
     resolve_args.extend([
       "loop_only",
       "build_outside_model",
       "no_sub_segments",
       "n_random_loop %d" % self.params.n_random_loop,
       "loop_length %d" % (n_res - 2),
       "rms_random_loop 0.3",
       "rho_min_main_low 0.5",
       "rho_min_main_base 0.5",
       "n_internal_start 0",
     ])
   else :
     resolve_args.extend([
       "rebuild_in_place",
       "replace_existing",
       "richardson_rotamers",
       "min_z_value_rho -3.0",
       "delta_phi   20.00",
       "dist_cut_base 3.0",
       "n_random_frag 0",
       "group_ca_length 4",
       "group_length 2",
     ])
   out = null_out()
   if (self.debug) :
     out = self.out
   cmn = resolve_in_memory.run(
     map_coeffs=self.box_map_coeffs,
     pdb_inp=inp_hierarchy.as_pdb_input(),
     build=True,
     input_text="\n".join(resolve_args),
     chain_type="PROTEIN",
     seq_file_as_string=seq,
     out=out)
   new_pdb_input = pdb.input(
     source_info='string',
     lines=flex.split_lines(cmn.atom_db.pdb_out_as_string))
   new_hierarchy = new_pdb_input.construct_hierarchy()
   print >> self.out, "  %d atoms rebuilt" % len(new_hierarchy.atoms())
   new_hierarchy.write_pdb_file("resolve.pdb")
   selection_moved = flex.size_t()
   sites_new = flex.vec3_double()
   for atom in new_hierarchy.atoms() :
     id_str = atom.id_str()
     if (not id_str in self.atom_id_mapping) :
       raise KeyError("Atom ID %s not recognized in RESOLVE model." % id_str)
     i_seq = self.atom_id_mapping[id_str]
     selection_moved.append(i_seq)
     sites_new.append(atom.xyz)
   sites_cart_selected = self.box_selected_hierarchy.atoms().extract_xyz()
   sites_cart_selected.set_selected(selection_moved, sites_new)
   self.box_selected_hierarchy.atoms().set_xyz(sites_cart_selected)
   sites_cart_box = self.box.xray_structure_box.sites_cart()
   sites_cart_box.set_selected(self.selection_in_box, sites_cart_selected)
   self.box.xray_structure_box.set_sites_cart(sites_cart_box)
   self.box.pdb_hierarchy_box.atoms().set_xyz(sites_cart_box)
   t2 = time.time()
   print >> self.out, "  RESOLVE time: %.1fs" % (t2-t1)
   selection_rebuilt = self.selection_in_box.select(selection_moved)
   minimize_sel = flex.bool(self.n_sites_box, False).set_selected(
     self.selection_in_box, True).set_selected(selection_rebuilt, False)
   # atoms present in the selection but not in the RESOLVE model (usually
   # hydrogen atoms) need to be minimized to follow the rebuilt sites
   if (minimize_sel.count(True) > 0) :
     print >> self.out, "  Performing geometry minimzation on unbuilt sites"
     self.geometry_minimization(
       selection=minimize_sel,
       nonbonded=False)
   self.box.write_pdb_file("box_resolve.pdb")
   # two alternatives here: restrain other atoms tightly, and minimize the
   # entire box, or restrain selected atoms loosely, and refine only those
   self.restrain_atoms(
     selection=self.others_in_box,
     reference_sigma=0.02)
   if (self.params.anneal) :
     self.anneal(start_temperature=2500)
   else :
     self.real_space_refine(selection=self.selection_all_box)
   self.box.write_pdb_file("box_refined.pdb")
   self.box.write_ccp4_map()
   mean_density_end = self.mean_density_at_sites()
   cc_end = self.cc_model_map()
   print >> self.out, "  mean density level: start=%.2fsigma  end=%.2fsigma" \
     % (mean_density_start, mean_density_end)
   print >> self.out, "  model-map CC: start=%.3f  end=%.3f" % (cc_start,
     cc_end)
   sites_final = self.get_selected_sites(hydrogens=False)
   print >> self.out, "  rmsd to starting model: %.3f Angstrom" % \
     sites_final.rms_difference(sites_start)
   t3 = time.time()
   print >> self.out, "  Total build and refine time: %.1fs" % (t3-t1)
Exemple #51
0
def assemble_constraint_groups_3d (
    xray_structure,
    pdb_atoms,
    constraint_groups,
    interaction_distance_cutoff=4.0,
    verbose=False,
    log=None) :
  """
  Re-sorts occupancy constraint groups so that conformers whose motion is
  correlated (i.e. they interact in 3D, without necessarily being part of
  the same fragment/molecule/ASU) are grouped together.  As input, it expects
  the constraint groups output by mmtbx.utils.occupancy_selections(), which
  will already have connectivity taken into account.  This function will exit
  with an error if the occupancies for the new groups are not consistent.
  """
  if (log is None) :
    log = null_out()
  make_sub_header("Correlated occupancy grouping", out=log)
  print >> log, """
  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  !!                  WARNING - EXPERIMENTAL FEATURE                        !!
  !!                                                                        !!
  !! Grouping of occupancy constraints in 3D is experimental and not fully  !!
  !! tested.  Use at your own risk!  For bug reports, etc. contact us by    !!
  !! email at [email protected].                                       !!
  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
"""
  occupancies = pdb_atoms.extract_occ()
  pair_asu_table = xray_structure.pair_asu_table(
    distance_cutoff=interaction_distance_cutoff)
  pair_sym_table = pair_asu_table.extract_pair_sym_table()
  k = 0
  n_groups_start = len(constraint_groups)
  while (k < len(constraint_groups)) :
    groups = constraint_groups[k]
    print >> log, "Constraint group %d: %d conformers" % (k+1, len(groups))
    merge_constraints = []
    for i_sel, selection in enumerate(groups) :
      occ = occupancies.select(selection)
      altloc = pdb_atoms[selection[0]].fetch_labels().altloc
      print >> log, "  conformer '%s': %d atoms" % (altloc, len(selection))
      if (not occ.all_eq(occ[0])) :
        raise Sorry("At least one occupancy constraint group has "+
          "inconsistent occupancies for atoms in a single conformer.  To use "+
          "the automatic 3D constraints, the starting occupancies must be "+
          "uniform within each selection.")
      for i_seq in selection :
        labels = pdb_atoms[i_seq].fetch_labels()
        if (labels.altloc.strip() == '') :
          continue
        pair_sym_dict = pair_sym_table[i_seq]
        if (verbose) :
          print "%s (group %d):" % (pdb_atoms[i_seq].id_str(), k+1)
        for j_seq, sym_ops in pair_sym_dict.items() :
          kk = k + 1
          while (kk < len(constraint_groups)) :
            combine_group = False
            for other_selection in constraint_groups[kk] :
              if (j_seq in other_selection) :
                if (verbose) :
                  print "  %s (group %d)" % (pdb_atoms[j_seq].id_str(), kk+1)
                merge_constraints.append(constraint_groups[kk])
                del constraint_groups[kk]
                combine_group = True
                break
            if (not combine_group) :
              kk += 1
    if (len(merge_constraints) > 0) :
      print >> log, "Merging %d constraint groups with group %d" % (
        len(merge_constraints), (k+1))
      for selection in groups :
        first_atom = pdb_atoms[selection[0]]
        altloc = first_atom.fetch_labels().altloc
        if (altloc.strip() == '') :
          raise RuntimeError(("Atom '%s' in occupancy constraint group has "+
            "blank altloc ID") % first_atom.id_str())
        for merge_groups in merge_constraints :
          kk = 0
          while (kk < len(merge_groups)) :
            other_selection = merge_groups[kk]
            altloc2 = pdb_atoms[other_selection[0]].fetch_labels().altloc
            if (altloc2 == altloc) :
              print >> log, "  combining %d atoms with altloc %s" % \
                (len(other_selection), altloc)
              occ1 = occupancies.select(selection)
              occ2 = occupancies.select(other_selection)
              if (not occ1.all_eq(occ2[0])) or (not occ2.all_eq(occ1[0])) :
                raise Sorry(
                  ("Inconsistent occupancies in spatially related groups "+
                  "(%.2f versus %.2f).  To use automatic 3D occupancy "+
                  "restraints, the correlated conformers must start with "+
                  "the same initial occupancy.") % (occ1[0], occ2[0]))
              selection.extend(other_selection)
              del merge_groups[kk]
            else :
              kk += 1
      for merge_groups in merge_constraints :
        if (len(merge_groups) > 0) :
          for other_selection in merge_groups :
            altloc = pdb_atoms[other_selection[0]].fetch_labels().altloc
            print >> log, ("  warning: %d atoms with altloc %s do not "+
              "correspond to an existing group") % (len(other_selection),
              altloc)
            groups.append(other_selection)
    k += 1
  if (len(constraint_groups) != n_groups_start) :
    print >> log, "New occupancy constraint groups:"
    for i_group, constraint_group in enumerate(constraint_groups) :
      print >> log, "  group %d:" % (i_group+1)
      for selection in constraint_group :
        resids = []
        altlocs = set()
        for i_seq in selection :
          atom_group = pdb_atoms[i_seq].parent()
          ag_id = atom_group.id_str()
          altlocs.add(atom_group.altloc)
          if (not ag_id in resids) :
            resids.append(ag_id)
        assert len(altlocs) == 1
        print >> log, "    conformer '%s' (%d atoms):" % (list(altlocs)[0],
          len(selection))
        for ag_id in resids :
          print >> log, "      atom_group %s" % ag_id
  else :
    print >> log, "Occupancy constraint groups unmodified."
  print >> log, ""
  return constraint_groups
Exemple #52
0
def filter_before_build (
    pdb_hierarchy,
    fmodel,
    geometry_restraints_manager,
    selection=None,
    params=None,
    verbose=True,
    log=sys.stdout) :
  """
  Pick residues suitable for building alternate conformations - by default,
  this means no MolProbity/geometry outliers, good fit to map, no missing
  atoms, and no pre-existing alternates, but with significant difference
  density nearby.
  """
  from mmtbx.validation import molprobity
  from mmtbx.rotamer import rotamer_eval
  import mmtbx.monomer_library.server
  from mmtbx import building
  from iotbx.pdb import common_residue_names_get_class
  from scitbx.array_family import flex
  if (selection is None) :
    selection = flex.bool(fmodel.xray_structure.scatterers().size(), True)
  pdb_atoms = pdb_hierarchy.atoms()
  assert (pdb_atoms.size() == fmodel.xray_structure.scatterers().size())
  pdb_atoms.reset_i_seq()
  full_validation = molprobity.molprobity(
    pdb_hierarchy=pdb_hierarchy,
    fmodel=fmodel,
    geometry_restraints_manager=geometry_restraints_manager,
    outliers_only=False,
    rotamer_library="8000")
  if (verbose) :
    full_validation.show(out=log)
  multi_criterion = full_validation.as_multi_criterion_view()
  if (params is None) :
    params = libtbx.phil.parse(filter_params_str).extract()
  mon_lib_srv = mmtbx.monomer_library.server.server()
  two_fofc_map, fofc_map = building.get_difference_maps(fmodel=fmodel)
  residues = []
  filters = params.discard_outliers
  make_sub_header("Identifying candidates for building", out=log)
  # TODO parallelize
  for chain in pdb_hierarchy.only_model().chains() :
    if (not chain.is_protein()) :
      continue
    for residue_group in chain.residue_groups() :
      atom_groups = residue_group.atom_groups()
      id_str = residue_group.id_str()
      i_seqs = residue_group.atoms().extract_i_seq()
      residue_sel = selection.select(i_seqs)
      if (not residue_sel.all_eq(True)) :
        continue
      if (len(atom_groups) > 1) :
        print >> log, "  %s is already multi-conformer" % id_str
        continue
      atom_group = atom_groups[0]
      res_class = common_residue_names_get_class(atom_group.resname)
      if (res_class != "common_amino_acid") :
        print >> log, "  %s: non-standard residue" % id_str
        continue
      missing_atoms = rotamer_eval.eval_residue_completeness(
        residue=atom_group,
        mon_lib_srv=mon_lib_srv,
        ignore_hydrogens=True)
      if (len(missing_atoms) > 0) :
        # residues modeled as pseudo-ALA are allowed by default; partially
        # missing sidechains are more problematic
        if ((building.is_stub_residue(atom_group)) and
            (not params.ignore_stub_residues)) :
          pass
        else :
          print >> log, "  %s: missing or incomplete sidechain" % \
            (id_str, len(missing_atoms))
          continue
      validation = multi_criterion.get_residue_group_data(residue_group)
      is_outlier = is_validation_outlier(validation, params)
      if (is_outlier) :
        print >> log, "  %s" % str(validation)
        continue
      if (params.use_difference_map) :
        i_seqs_no_hd = building.get_non_hydrogen_atom_indices(residue_group)
        map_stats = building.local_density_quality(
          fofc_map=fofc_map,
          two_fofc_map=two_fofc_map,
          atom_selection=i_seqs_no_hd,
          xray_structure=fmodel.xray_structure,
          radius=params.sampling_radius)
        if ((map_stats.number_of_atoms_below_fofc_map_level() == 0) and
            (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) :
          if (verbose) :
            print >> log, "  no difference density for %s" % id_str
          continue
      residues.append(residue_group.only_atom_group())
  if (len(residues) == 0) :
    raise Sorry("No residues passed the filtering criteria.")
  print >> log, ""
  print >> log, "Alternate conformations will be tried for %d residue(s):" % \
      len(residues)
  building.show_chain_resseq_ranges(residues, out=log, prefix="  ")
  print >> log, ""
  return residues
Exemple #53
0
def run (args=None, params=None, out=sys.stdout) :
  assert [args, params].count(None) == 1
  if args is not None:
    if (len(args) == 0) or ("--help" in args) :
      raise Usage("""
  phenix.cc_star model.pdb data.mtz unmerged_data=data.hkl [n_bins=X] [options]
  phenix.cc_star model_refine_001.mtz unmerged_data=data.hkl [...]

Implementation of the method for assessing data and model quality described in:
  Karplus PA & Diederichs K (2012) Science 336:1030-3.

Full parameters:
  %s
  """ % master_phil.as_str(prefix=" ", attributes_level=1))
    import iotbx.phil
    cmdline = iotbx.phil.process_command_line_with_files(
      args=args,
      master_phil=master_phil,
      pdb_file_def="model",
      reflection_file_def="data")
    params = cmdline.work.extract()
  import mmtbx.command_line
  import mmtbx.validation.experimental
  from iotbx import merging_statistics
  from iotbx import file_reader
  if (params.data is None) :
    raise Sorry("Please specify a data file (usually MTZ format).")
  if (params.unmerged_data is None) :
    raise Sorry("Please specify unmerged_data file")
  hkl_in = file_reader.any_file(params.data, force_type="hkl")
  hkl_in.check_file_type("hkl")
  f_model = f_obs = r_free_flags = None
  f_models = []
  data_arrays = []
  f_model_labels = []
  if (params.f_model_labels is None) :
    for array in hkl_in.file_server.miller_arrays :
      labels = array.info().label_string()
      if (array.is_complex_array()) :
        if (labels.startswith("F-model") or labels.startswith("FMODEL")) :
          f_models.append(array)
          f_model_labels.append(labels)
    if (len(f_models) > 1) :
      raise Sorry(("Multiple F(model) arrays found:\n%s\nPlease specify the "+
        "'labels' parameter.") % "\n".join(f_model_labels))
    elif (len(f_models) == 1) :
      f_model = f_models[0]
      if (f_model.anomalous_flag()) :
        info = f_model.info()
        f_model = f_model.average_bijvoet_mates().set_info(info)
      print >> out, "F(model):"
      f_model.show_summary(f=out, prefix="  ")
    else :
      data_array = hkl_in.file_server.get_xray_data(
        file_name=params.data,
        labels=params.f_obs_labels,
        ignore_all_zeros=True,
        parameter_scope="")
      if (data_array.is_xray_intensity_array()) :
        from cctbx import french_wilson
        f_obs = french_wilson.french_wilson_scale(
          miller_array=data_array,
          out=out)
      else :
        f_obs = data_array
  else :
    for array in hkl_in.file_server.miller_arrays :
      array_labels = array.info().label_string()
      if (array_labels == params.f_model_labels) :
        if (array.is_complex_array()) :
          f_model = array
          break
        else :
          raise Sorry("The data in %s are not of the required type." %
            array_labels)
  if (f_model is not None) :
    assert (f_obs is None)
    for array in hkl_in.file_server.miller_arrays :
      labels = array.info().label_string()
      if (labels == params.f_obs_labels) :
        f_obs = array
        break
    else :
      try :
        f_obs = hkl_in.file_server.get_amplitudes(
          file_name=params.f_obs_labels,
          labels=None,
          convert_to_amplitudes_if_necessary=False,
          parameter_name="f_obs_labels",
          parameter_scope="",
          strict=True)
      except Sorry :
        raise Sorry("You must supply a file containing both F-obs and F-model "+
          "if you want to use a pre-calculated F-model array.")
  assert (f_obs.is_xray_amplitude_array())
  if (f_obs.anomalous_flag()) :
    info = f_obs.info()
    f_obs = f_obs.average_bijvoet_mates().set_info(info)
  print >> out, "F(obs):"
  f_obs.show_summary(f=out, prefix="  ")
  print >> out, ""
  r_free_flags, test_flag_value = hkl_in.file_server.get_r_free_flags(
    file_name=params.data,
    label=params.r_free_flags.label,
    test_flag_value=params.r_free_flags.test_flag_value,
    disable_suitability_test=False,
    parameter_scope="")
  info = r_free_flags.info()
  r_free_flags = r_free_flags.customized_copy(
    data=r_free_flags.data()==test_flag_value).set_info(info)
  if (r_free_flags.anomalous_flag()) :
    r_free_flags = r_free_flags.average_bijvoet_mates().set_info(info)
  print >> out, "R-free flags:"
  r_free_flags.show_summary(f=out, prefix="  ")
  print >> out, ""
  unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data(
    f_obs=f_obs,
    file_name=params.unmerged_data,
    data_labels=params.unmerged_labels,
    log=out)
  print >> out, "Unmerged intensities:"
  unmerged_i_obs.show_summary(f=out, prefix="  ")
  print >> out, ""
  if (f_model is None) :
    assert (f_obs is not None)
    if (params.model is None) :
      raise Sorry("A PDB file is required if F(model) is not pre-calculated.")
    make_sub_header("Calculating F(model)", out=out)
    pdb_in = file_reader.any_file(params.model, force_type="pdb")
    pdb_in.check_file_type("pdb")
    pdb_symm = pdb_in.file_object.crystal_symmetry()
    if (pdb_symm is None) :
      pdb_symm = f_obs
    else :
      if (f_obs.crystal_symmetry() is None) :
        f_obs = f_obs.customized_copy(crystal_symmetry=pdb_symm)
      elif (not pdb_symm.is_similar_symmetry(f_obs)) :
        mmtbx.command_line.show_symmetry_error(
          file1="PDB file",
          file2="data file",
          symm1=pdb_symm,
          symm2=f_obs)
    xray_structure = pdb_in.file_object.xray_structure_simple(
      crystal_symmetry=pdb_symm)
    from mmtbx.utils import fmodel_simple
    # XXX this gets done anyway later, but they need to be consistent before
    # creating the fmodel manager
    if (f_obs.anomalous_flag()) :
      f_obs = f_obs.average_bijvoet_mates()
    f_obs = f_obs.eliminate_sys_absent()
    f_obs, r_free_flags = f_obs.map_to_asu().common_sets(
      other=r_free_flags.map_to_asu())
    fmodel = fmodel_simple(
      f_obs=f_obs,
      r_free_flags=r_free_flags,
      xray_structures=[xray_structure],
      skip_twin_detection=True,
      scattering_table="n_gaussian")
    fmodel.show(log=out)
    f_model = fmodel.f_model()
    r_free_flags = f_model.customized_copy(data=fmodel.arrays.free_sel)
  else :
    if (f_model.anomalous_flag()) :
      f_model = f_model.average_bijvoet_mates()
    f_model, r_free_flags = f_model.common_sets(other=r_free_flags)
  stats = mmtbx.validation.experimental.merging_and_model_statistics(
    f_model=f_model,
    f_obs=f_obs,
    r_free_flags=r_free_flags,
    unmerged_i_obs=unmerged_i_obs,
    n_bins=params.n_bins,
    sigma_filtering=params.sigma_filtering)
  stats.show_cc_star(out=out)
  if (params.loggraph) :
    stats.show_loggraph(out=out)
  print >> out, ""
  print >> out, "Reference:"
  print >> out, "  Karplus PA & Diederichs K (2012) Science 336:1030-3."
  print >> out, ""
  return stats
  def show (self, out=sys.stdout, outliers_only=True, suppress_summary=False,
      show_percentiles=False) :
    """
    Comprehensive output with individual outlier lists, plus summary.
    """
    if (self.xtriage is not None) :
      self.xtriage.summarize_issues().show(out=out)
    if (self.data_stats is not None) :
      make_header("Experimental data", out=out)
      self.data_stats.show(out=out, prefix="  ")
      if (self.real_space is not None) :
        make_sub_header("Residues with poor real-space CC", out=out)
        self.real_space.show(out=out, prefix="  ")
      if (self.waters is not None) :
        make_sub_header("Suspicious water molecules", out=out)
        self.waters.show(out=out, prefix="  ")
    if (self.model_stats is not None) :
      make_header("Model properties", out=out)
      self.model_stats.show(prefix="  ", out=out)
    if (self.restraints is not None) :
      make_header("Geometry restraints", out=out)
      self.restraints.show(out=out, prefix="  ")
    make_header("Molprobity validation", out=out)
    if (self.ramalyze is not None) :
      make_sub_header("Ramachandran angles", out=out)
      self.ramalyze.show(out=out, prefix="  ", outliers_only=outliers_only)
##### omegalyze ################################################################
    if (self.omegalyze is not None) :
      make_sub_header("Omegalyze analysis", out=out)
      self.omegalyze.show(out=out, prefix=" ", outliers_only=outliers_only)
##### omegalyze ################################################################
    if (self.rotalyze is not None) :
      make_sub_header("Sidechain rotamers", out=out)
      self.rotalyze.show(out=out, prefix="  ", outliers_only=outliers_only)
    if (self.cbetadev is not None) :
      make_sub_header("C-beta deviations", out=out)
      self.cbetadev.show(out=out, prefix="  ", outliers_only=outliers_only)
    if (self.clashes is not None) :
      make_sub_header("Bad clashes", out=out)
      self.clashes.show(out=out, prefix="  ")
    if (self.nqh_flips is not None) :
      make_sub_header("Asn/Gln/His flips", out=out)
      self.nqh_flips.show(out=out, prefix="  ")
    if (self.rna is not None) :
      make_header("RNA validation", out=out)
      self.rna.show(out=out, prefix="  ", outliers_only=outliers_only)
    if (not suppress_summary) :
      make_header("Summary", out=out)
      self.show_summary(out=out, prefix="  ",
        show_percentiles=show_percentiles)
    return self
Exemple #55
0
 def __init__ (self,
     args,
     master_phil,
     out=sys.stdout,
     process_pdb_file=True,
     require_data=True,
     create_fmodel=True,
     prefer_anomalous=None,
     force_non_anomalous=False,
     set_wavelength_from_model_header=False,
     set_inelastic_form_factors=None,
     usage_string=None,
     create_log_buffer=False,
     remove_unknown_scatterers=False,
     generate_input_phil=False) :
   import mmtbx.monomer_library.pdb_interpretation
   import mmtbx.monomer_library.server
   import mmtbx.utils
   from iotbx import crystal_symmetry_from_any
   from iotbx import file_reader
   import iotbx.phil
   if generate_input_phil :
     assert isinstance(master_phil, basestring)
     master_phil = generate_master_phil_with_inputs(phil_string=master_phil)
   if isinstance(master_phil, str) :
     master_phil = iotbx.phil.parse(master_phil)
   if (usage_string is not None) :
     if (len(args) == 0) or ("--help" in args) :
       raise Usage("""%s\n\nFull parameters:\n%s""" % (usage_string,
         master_phil.as_str(prefix="  ")))
   if (force_non_anomalous) :
     assert (not prefer_anomalous)
   assert (set_inelastic_form_factors in [None, "sasaki", "henke"])
   self.args = args
   self.master_phil = master_phil
   self.processed_pdb_file = self.pdb_inp = None
   self.pdb_hierarchy = self.xray_structure = None
   self.geometry = None
   self.sequence = None
   self.fmodel = None
   self.f_obs = None
   self.r_free_flags = None
   self.intensity_flag = None
   self.raw_data = None
   self.raw_flags = None
   self.test_flag_value = None
   self.miller_arrays = None
   self.hl_coeffs = None
   self.cif_objects = []
   self.log = out
   if ("--quiet" in args) or ("quiet=True" in args) :
     self.log = null_out()
   elif create_log_buffer :
     self.log = multi_out()
     self.log.register(label="stdout", file_object=out)
     self.log.register(label="log_buffer", file_object=StringIO())
   make_header("Collecting inputs", out=self.log)
   cmdline = iotbx.phil.process_command_line_with_files(
     args=args,
     master_phil=master_phil,
     pdb_file_def="input.pdb.file_name",
     reflection_file_def="input.xray_data.file_name",
     cif_file_def="input.monomers.file_name",
     seq_file_def="input.sequence")
   self.working_phil = cmdline.work
   params = self.working_phil.extract()
   if len(params.input.pdb.file_name) == 0 :
     raise Sorry("At least one PDB file is required as input.")
   self.cif_file_names = params.input.monomers.file_name
   self.pdb_file_names = params.input.pdb.file_name
   # SYMMETRY HANDLING - PDB FILES
   self.crystal_symmetry = pdb_symm = None
   for pdb_file_name in params.input.pdb.file_name :
     pdb_symm = crystal_symmetry_from_any.extract_from(pdb_file_name)
     if (pdb_symm is not None) :
       break
   # DATA INPUT
   data_and_flags = hkl_symm = hkl_in = None
   if (params.input.xray_data.file_name is None) :
     if (require_data) :
       raise Sorry("At least one reflections file is required as input.")
   else :
     # FIXME this may still require that the data file has full crystal
     # symmetry defined (although for MTZ input this will not be a problem)
     make_sub_header("Processing X-ray data", out=self.log)
     hkl_in = file_reader.any_file(params.input.xray_data.file_name)
     hkl_in.check_file_type("hkl")
     hkl_server = hkl_in.file_server
     symm = hkl_server.miller_arrays[0].crystal_symmetry()
     if ((symm is None) or
         (symm.space_group() is None) or
         (symm.unit_cell() is None)) :
       if (pdb_symm is not None) :
         from iotbx.reflection_file_utils import reflection_file_server
         print >> self.log, \
           "No symmetry in X-ray data file - using PDB symmetry:"
         pdb_symm.show_summary(f=out, prefix="  ")
         hkl_server = reflection_file_server(
           crystal_symmetry=pdb_symm,
           reflection_files=[hkl_in.file_object])
       else :
         raise Sorry("No crystal symmetry information found in input files.")
     if (hkl_server is None) :
       hkl_server = hkl_in.file_server
     data_and_flags = mmtbx.utils.determine_data_and_flags(
       reflection_file_server=hkl_server,
       parameters=params.input.xray_data,
       data_parameter_scope="input.xray_data",
       flags_parameter_scope="input.xray_data.r_free_flags",
       prefer_anomalous=prefer_anomalous,
       force_non_anomalous=force_non_anomalous,
       log=self.log)
     self.intensity_flag = data_and_flags.intensity_flag
     self.raw_data = data_and_flags.raw_data
     self.raw_flags = data_and_flags.raw_flags
     self.test_flag_value = data_and_flags.test_flag_value
     self.f_obs = data_and_flags.f_obs
     self.r_free_flags = data_and_flags.r_free_flags
     self.miller_arrays = hkl_in.file_server.miller_arrays
     hkl_symm = self.raw_data.crystal_symmetry()
   if len(self.cif_file_names) > 0 :
     for file_name in self.cif_file_names :
       cif_obj = mmtbx.monomer_library.server.read_cif(file_name=file_name)
       self.cif_objects.append((file_name, cif_obj))
   # SYMMETRY HANDLING - COMBINED
   if (hkl_symm is not None) :
     use_symmetry = hkl_symm
   from iotbx.symmetry import combine_model_and_data_symmetry
   self.crystal_symmetry = combine_model_and_data_symmetry(
     model_symmetry=pdb_symm,
     data_symmetry=hkl_symm)
   if (self.crystal_symmetry is not None) and (self.f_obs is not None) :
     self.f_obs = self.f_obs.customized_copy(
       crystal_symmetry=self.crystal_symmetry).eliminate_sys_absent().set_info(
         self.f_obs.info())
     self.r_free_flags = self.r_free_flags.customized_copy(
       crystal_symmetry=self.crystal_symmetry).eliminate_sys_absent().set_info(
         self.r_free_flags.info())
   # EXPERIMENTAL PHASES
   target_name = "ml"
   if hasattr(params.input, "experimental_phases") :
     flag = params.input.use_experimental_phases
     if (flag in [True, Auto]) :
       phases_file = params.input.experimental_phases.file_name
       if (phases_file is None) :
         phases_file = params.input.xray_data.file_name
         phases_in = hkl_in
       else :
         phases_in = file_reader.any_file(phases_file)
         phases_in.check_file_type("hkl")
       phases_in.file_server.err = self.log # redirect error output
       space_group = self.crystal_symmetry.space_group()
       point_group = space_group.build_derived_point_group()
       hl_coeffs = mmtbx.utils.determine_experimental_phases(
         reflection_file_server = phases_in.file_server,
         parameters             = params.input.experimental_phases,
         log                    = self.log,
         parameter_scope        = "input.experimental_phases",
         working_point_group    = point_group,
         symmetry_safety_check  = True)
       if (hl_coeffs is not None) :
         hl_coeffs = hl_coeffs.map_to_asu()
         if hl_coeffs.anomalous_flag() :
           if (not self.f_obs.anomalous_flag()) :
             hl_coeffs = hl_coeffs.average_bijvoet_mates()
         elif self.f_obs.anomalous_flag() :
           hl_coeffs = hl_coeffs.generate_bijvoet_mates()
         self.hl_coeffs = hl_coeffs.matching_set(other=self.f_obs,
           data_substitute=(0,0,0,0))
         target_name = "mlhl"
   # PDB INPUT
   self.unknown_residues_flag = False
   self.unknown_residues_error_message = False
   if process_pdb_file :
     pdb_interp_params = getattr(params, "pdb_interpretation", None)
     if (pdb_interp_params is None) :
       pdb_interp_params = \
         mmtbx.monomer_library.pdb_interpretation.master_params.extract()
     make_sub_header("Processing PDB file(s)", out=self.log)
     pdb_combined = mmtbx.utils.combine_unique_pdb_files(
       file_names=params.input.pdb.file_name,)
     pdb_combined.report_non_unique(out=self.log)
     pdb_raw_records = pdb_combined.raw_records
     processed_pdb_files_srv = mmtbx.utils.process_pdb_file_srv(
       cif_objects=self.cif_objects,
       pdb_interpretation_params=pdb_interp_params,
       crystal_symmetry=self.crystal_symmetry,
       use_neutron_distances=params.input.scattering_table=="neutron",
       stop_for_unknowns=getattr(pdb_interp_params, "stop_for_unknowns",False),
       log=self.log)
     self.processed_pdb_file, self.pdb_inp = \
       processed_pdb_files_srv.process_pdb_files(
         raw_records = pdb_raw_records,
         stop_if_duplicate_labels = False,
         allow_missing_symmetry=\
           (self.crystal_symmetry is None) and (not require_data))
     error_msg = self.processed_pdb_file.all_chain_proxies.\
       fatal_problems_message(
         ignore_unknown_scattering_types=False,
         ignore_unknown_nonbonded_energy_types=False)
     if (error_msg is not None) :
       self.unknown_residues_flag = True
       self.unknown_residues_error_message = error_msg
     self.geometry = self.processed_pdb_file.geometry_restraints_manager(
       show_energies=False)
     assert (self.geometry is not None)
     self.xray_structure = self.processed_pdb_file.xray_structure()
     chain_proxies = self.processed_pdb_file.all_chain_proxies
     self.pdb_hierarchy = chain_proxies.pdb_hierarchy
   else :
     pdb_file_object = mmtbx.utils.pdb_file(
       pdb_file_names=params.input.pdb.file_name,
       cif_objects=self.cif_objects,
       crystal_symmetry=self.crystal_symmetry,
       log=self.log)
     self.pdb_inp = pdb_file_object.pdb_inp
     self.pdb_hierarchy = self.pdb_inp.construct_hierarchy()
     if (remove_unknown_scatterers) :
       known_sel = self.pdb_hierarchy.atom_selection_cache().selection(
         "not element X")
       if (known_sel.count(True) != len(known_sel)) :
         self.pdb_hierarchy = self.pdb_hierarchy.select(known_sel)
         self.xray_structure = self.pdb_hierarchy.extract_xray_structure(
           crystal_symmetry=self.crystal_symmetry)
     self.pdb_hierarchy.atoms().reset_i_seq()
     if (self.xray_structure is None) :
       self.xray_structure = self.pdb_inp.xray_structure_simple(
         crystal_symmetry=self.crystal_symmetry)
   # wavelength
   if (params.input.energy is not None) :
     if (params.input.wavelength is not None) :
       raise Sorry("Both wavelength and energy have been specified!")
     params.input.wavelength = 12398.424468024265 / params.input.energy
   if (set_wavelength_from_model_header and params.input.wavelength is None) :
     wavelength = self.pdb_inp.extract_wavelength()
     if (wavelength is not None) :
       print >> self.log, ""
       print >> self.log, "Using wavelength = %g from PDB header" % wavelength
       params.input.wavelength = wavelength
   # set scattering table
   if (data_and_flags is not None) :
     self.xray_structure.scattering_type_registry(
       d_min=self.f_obs.d_min(),
       table=params.input.scattering_table)
     if ((params.input.wavelength is not None) and
         (set_inelastic_form_factors is not None)) :
       self.xray_structure.set_inelastic_form_factors(
         photon=params.input.wavelength,
         table=set_inelastic_form_factors)
     make_sub_header("xray_structure summary", out=self.log)
     self.xray_structure.scattering_type_registry().show(out = self.log)
     self.xray_structure.show_summary(f=self.log)
   # FMODEL SETUP
   if (create_fmodel) and (data_and_flags is not None) :
     make_sub_header("F(model) initialization", out=self.log)
     skip_twin_detection = getattr(params.input, "skip_twin_detection", None)
     twin_law = getattr(params.input, "twin_law", None)
     if (twin_law is Auto) :
       if (self.hl_coeffs is not None) :
         raise Sorry("Automatic twin law determination not supported when "+
           "experimental phases are used.")
     elif (skip_twin_detection is not None) :
       twin_law = Auto
     if (twin_law is Auto) :
       print >> self.log, "Twinning will be detected automatically."
       self.fmodel = mmtbx.utils.fmodel_simple(
         xray_structures=[self.xray_structure],
         scattering_table=params.input.scattering_table,
         f_obs=self.f_obs,
         r_free_flags=self.r_free_flags,
         skip_twin_detection=skip_twin_detection,
         target_name=target_name,
         log=self.log)
     else :
       if ((twin_law is not None) and (self.hl_coeffs is not None)) :
         raise Sorry("Automatic twin law determination not supported when "+
           "experimental phases are used.")
       self.fmodel = mmtbx.utils.fmodel_manager(
         f_obs=self.f_obs,
         xray_structure=self.xray_structure,
         r_free_flags=self.r_free_flags,
         twin_law=params.input.twin_law,
         hl_coeff=self.hl_coeffs,
         target_name=target_name)
       self.fmodel.update_all_scales(
         params=None,
         log=self.log,
         optimize_mask=True,
         show=True)
     self.fmodel.info().show_rfactors_targets_scales_overall(out=self.log)
   # SEQUENCE
   if (params.input.sequence is not None) :
     seq_file = file_reader.any_file(params.input.sequence,
       force_type="seq",
       raise_sorry_if_errors=True)
     self.sequence = seq_file.file_object
   # UNMERGED DATA
   self.unmerged_i_obs = None
   if hasattr(params.input, "unmerged_data") :
     if (params.input.unmerged_data.file_name is not None) :
       self.unmerged_i_obs = load_and_validate_unmerged_data(
         f_obs=self.f_obs,
         file_name=params.input.unmerged_data.file_name,
         data_labels=params.input.unmerged_data.labels,
         log=self.log)
   self.params = params
   print >> self.log, ""
   print >> self.log, "End of input processing"
def run (args, out=sys.stdout) :
  from mmtbx.building import make_library
  import iotbx.phil
  cmdline = iotbx.phil.process_command_line_with_files(
    args=args,
    master_phil_string=master_phil_str,
    pdb_file_def="model",
    seq_file_def="sequence",
    directory_def="search_directory",
    usage_string="mmtbx.get_related_ensemble [model.pdb] [seq.fa] [...]")
  params = cmdline.work.extract()
  sequence = None
  if (params.model is None) :
    raise Sorry("No model (PDB or mmCIF file) was specified.")
  if (params.sequence is not None) :
    seq_file = cmdline.get_file(params.sequence, force_type="seq")
    n_seqs = len(seq_file.file_object)
    if (n_seqs > 1) :
      print >> out, "%d sequences in file - will only use the first" % n_seqs
    sequence = seq_file.file_object[0].sequence
  pdb_file = cmdline.get_file(params.model, force_type="pdb")
  hierarchy = pdb_file.file_object.hierarchy
  reference_hierarchy = iotbx.pdb.hierarchy.root()
  model = iotbx.pdb.hierarchy.model()
  reference_hierarchy.append_model(model)
  for chain in hierarchy.models()[0].chains() :
    if (params.chain_id is None) or (chain.id == params.chain_id) :
      if (not chain.is_protein()) :
        if (chain.id == params.chain_id) :
          print >> out, \
            "warning: matching chain '%s' is not protein, skipping" % \
            chain.id
        continue
      else :
        # TODO select based on sequence if provided
        new_chain = iotbx.pdb.hierarchy.chain(id=chain.id)
        model.append_chain(new_chain)
        # get rid of alternate conformations
        for residue_group in chain.residue_groups() :
          atom_group = residue_group.atom_groups()[0]
          if (not atom_group.altloc.strip() in ['', 'A']) :
            continue
          new_rg = iotbx.pdb.hierarchy.residue_group(
            resseq=residue_group.resseq,
            icode=residue_group.icode)
          new_ag = atom_group.detached_copy()
          new_ag.altloc = ''
          new_rg.append_atom_group(new_ag)
          new_chain.append_residue_group(new_rg)
        if (sequence is None) :
          sequence = chain.as_padded_sequence(pad='X')
          print >> out, "Using sequence of chain '%s' (approx. %d residues)" % \
            (chain.id, len(sequence))
        break
  if (sequence is None) :
    raise Sorry("No protein sequence could be extracted based on these inputs.")
  make_sub_header("Finding related models and generating ensemble", out=out)
  ensemble = make_library.extract_and_superpose(
    reference_hierarchy=reference_hierarchy,
    search_directory=params.search_directory,
    sequence=sequence,
    params=params,
    out=out)
  f = null_out()
  if (params.output_file is not None) :
    f = open(params.output_file, "w")
  print >> out, "Assembling moved models:"
  ensemble_hierarchy = ensemble.as_multi_model_hierarchy()
  for k in ensemble.selection_moved :
    source_info = ensemble.related_chains[k].source_info
    print >> out, "  Model %d: %s:%s" % (k+1, source_info,
      ensemble.related_chains[k].chain_id)
    f.write("REMARK model %d is from %s\n" % (k+1, source_info))
  f.write(ensemble_hierarchy.as_pdb_string())
  f.close()
  return ensemble_hierarchy
def build_cycle (pdb_hierarchy,
    fmodel,
    geometry_restraints_manager,
    params,
    selection=None,
    cif_objects=(),
    nproc=Auto,
    out=sys.stdout,
    verbose=False,
    debug=None,
    i_cycle=0) :
  from mmtbx import restraints
  from scitbx.array_family import flex
  t_start = time.time()
  hd_sel = fmodel.xray_structure.hd_selection()
  n_hydrogen = hd_sel.count(True)
  if (n_hydrogen > 0) and (True) : #params.building.delete_hydrogens) :
    print >> out, "WARNING: %d hydrogen atoms will be removed!" % n_hydrogen
    non_hd_sel = ~hd_sel
    # XXX it's better to do this in-place for the hierarchy, because calling
    # pdb_hierarchy.select(non_hd_sel) will not remove parent-child
    # relationships involving hydrogens, which causes problems when running
    # the MolProbity validation.
    pdb_hierarchy.remove_hd(reset_i_seq=True)
    xray_structure = fmodel.xray_structure.select(non_hd_sel)
    assert (pdb_hierarchy.atoms_size() == xray_structure.scatterers().size())
    fmodel.update_xray_structure(xray_structure)
    geometry_restraints_manager = geometry_restraints_manager.select(non_hd_sel)
  pdb_atoms = pdb_hierarchy.atoms()
  segids = pdb_atoms.extract_segid().strip()
  if (not segids.all_eq("")) :
    print >> out, "WARNING: resetting segids to blank"
    for i_seq, atom in enumerate(pdb_atoms) :
      atom.segid = ""
      sc = fmodel.xray_structure.scatterers()[i_seq]
      sc.label = atom.id_str()
  if isinstance(selection, str) :
    sele_cache = pdb_hierarchy.atom_selection_cache()
    selection = sele_cache.selection(selection)
  make_header("Build cycle %d" % (i_cycle+1), out=out)
  fmodel.info().show_rfactors_targets_scales_overall(out=out)
  if (debug > 0) :
    from mmtbx.maps.utils import get_maps_from_fmodel
    from iotbx.map_tools import write_map_coeffs
    two_fofc, fofc = get_maps_from_fmodel(fmodel,
      exclude_free_r_reflections=True)
    write_map_coeffs(
      fwt_coeffs=two_fofc,
      delfwt_coeffs=fofc,
      file_name="cycle_%d_start.mtz" % (i_cycle+1))
  candidate_residues = alt_confs.filter_before_build(
    pdb_hierarchy=pdb_hierarchy,
    fmodel=fmodel,
    geometry_restraints_manager=geometry_restraints_manager,
    selection=selection,
    params=params.prefilter,
    verbose=verbose,
    log=out)
  t1 = time.time()
  print >> out, "filtering: %.3fs" % (t1-t_start)
  restraints_manager = restraints.manager(
    geometry=geometry_restraints_manager,
    normalization=True)
  make_sub_header("Finding alternate conformations", out=out)
  building_trials = find_all_alternates(
    residues=candidate_residues,
    pdb_hierarchy=pdb_hierarchy,
    restraints_manager=restraints_manager,
    fmodel=fmodel,
    params=params.residue_fitting,
    nproc=params.nproc,
    verbose=verbose,
    debug=debug,
    log=out).results
  t2 = time.time()
  print >> out, "  building: %.3fs" % (t2-t1)
  make_sub_header("Scoring and assembling alternates", out=out)
  n_alternates = process_results(
    pdb_hierarchy=pdb_hierarchy,
    fmodel=fmodel,
    residues_in=candidate_residues,
    building_trials=building_trials,
    params=params.residue_fitting,
    verbose=verbose,
    log=out)
  if (n_alternates > 0) :
    print >> out, ""
    print >> out, "  %d disordered residues built" % n_alternates
    n_split = alt_confs.spread_alternates(pdb_hierarchy,
      new_occupancy=params.residue_fitting.expected_occupancy,
      split_all_adjacent=True,
      log=out)
    assert (n_split > 0)
    print >> out, "  %d adjacent residues split" % n_split
  else :
    print >> out, "No alternates built this round."
  t3 = time.time()
  print >> out, "  assembly: %.3fs" % (t3-t2)
  if (not params.cleanup.rsr_after_build) :
    if (n_alternates > 0) :
      print >> out, "Skipping final RSR step (rsr_after_build=False)."
    else :
      print >> out, "No refinement needs to be performed."
  else :
    make_sub_header("Real-space refinement", out=out)
    print >> out, ""
    pdb_hierarchy = real_space_refine(
      pdb_hierarchy=pdb_hierarchy,
      fmodel=fmodel,
      cif_objects=cif_objects,
      params=params,
      nproc=params.nproc,
      remediate=True,
      out=out)
    t4 = time.time()
    print >> out, ""
    print >> out, "RSR: %.3fs" % (t4-t3)
  fmodel.info().show_targets(out=out, text="Rebuilt model")
  t_end = time.time()
  alt_confs.finalize_model(
    pdb_hierarchy=pdb_hierarchy,
    xray_structure=pdb_hierarchy.extract_xray_structure(
      crystal_symmetry=fmodel.xray_structure),
    set_b_iso=params.cleanup.set_b_iso,
    convert_to_isotropic=params.cleanup.convert_to_isotropic,
    selection="altloc A or altloc B")
  t_end = time.time()
  print >> out, "Total runtime for cycle: %.3fs" % (t_end-t_start)
  return pdb_hierarchy, n_alternates
  def set_rotamer_to_reference(self,
                               xray_structure,
                               mon_lib_srv=None,
                               log=None,
                               quiet=False):
    if self.mon_lib_srv is None:
      self.mon_lib_srv = mon_lib_srv
    assert isinstance(self.mon_lib_srv, mmtbx.monomer_library.server.server)
    if(log is None): log = sys.stdout
    make_sub_header(
      "Correcting rotamer outliers to match reference model",
      out=log)
    sa = SidechainAngles(False)
    r = rotalyze.rotalyze(pdb_hierarchy=self.pdb_hierarchy)
    rot_list_reference = {}
    coot_reference = {}
    for key in self.pdb_hierarchy_ref.keys():
      hierarchy = self.pdb_hierarchy_ref[key]
      rot_list_reference[key] = \
        rotalyze.rotalyze(pdb_hierarchy=hierarchy)
    model_hash = {}
    model_chis = {}
    reference_hash = {}
    reference_chis = {}
    model_outliers = 0
    for rot in r.results:
      model_hash[rot.id_str()] = rot.rotamer_name
      if rot.rotamer_name == "OUTLIER":
        model_outliers += 1

    for key in rot_list_reference.keys():
      reference_hash[key] = {}
      for rot in rot_list_reference[key].results:
        reference_hash[key][rot.id_str()] = rot.rotamer_name

    print >> log, "** evaluating rotamers for working model **"
    for model in self.pdb_hierarchy.models():
      for chain in model.chains():
        for residue_group in chain.residue_groups():
            all_dict = rotalyze.construct_complete_sidechain(residue_group)
            for atom_group in residue_group.atom_groups():
              try:
                atom_dict = all_dict.get(atom_group.altloc)
                chis = sa.measureChiAngles(atom_group, atom_dict)
                if chis is not None:
                  key = utils.id_str(
                          chain_id=chain.id,
                          resseq=residue_group.resseq,
                          resname=atom_group.resname,
                          icode=residue_group.icode,
                          altloc=atom_group.altloc)
                  model_chis[key] = chis
              except Exception:
                print >> log, \
                  '  %s%5s %s is missing some sidechain atoms, **skipping**' % (
                      chain.id, residue_group.resid(),
                      atom_group.altloc+atom_group.resname)
    if model_outliers == 0:
      print >> log, "No rotamer outliers detected in working model"
      return
    else:
      print >> log, "Number of rotamer outliers: %d" % model_outliers

    print >> log, "\n** evaluating rotamers for reference model **"
    for file in self.pdb_hierarchy_ref.keys():
      hierarchy = self.pdb_hierarchy_ref[file]
      reference_chis[file] = {}
      for model in hierarchy.models():
        for chain in model.chains():
          for residue_group in chain.residue_groups():
              all_dict = rotalyze.construct_complete_sidechain(residue_group)
              for atom_group in residue_group.atom_groups():
                try:
                  atom_dict = all_dict.get(atom_group.altloc)
                  chis = sa.measureChiAngles(atom_group, atom_dict)
                  if chis is not None:
                    key = utils.id_str(
                            chain_id=chain.id,
                            resseq=residue_group.resseq,
                            resname=atom_group.resname,
                            icode=residue_group.icode,
                            altloc=atom_group.altloc)
                    reference_chis[file][key] = chis
                except Exception:
                  print >> log, \
                    '  %s%5s %s is missing some sidechain atoms, **skipping**' % (
                        chain.id, residue_group.resid(),
                        atom_group.altloc+atom_group.resname)

    print >> log, "\n** fixing outliers **"
    sites_cart_start = xray_structure.sites_cart()
    for model in self.pdb_hierarchy.models():
      for chain in model.chains():
        for residue_group in chain.residue_groups():
          if len(residue_group.conformers()) > 1:
            print >> log, "  %s%5s %s has multiple conformations, **skipping**" % (
              chain.id, residue_group.resid(),
              " "+residue_group.atom_groups()[0].resname)
            continue
          for conformer in residue_group.conformers():
            for residue in conformer.residues():
              if residue.resname == "PRO":
                continue
              key = utils.id_str(
                      chain_id=chain.id,
                      resseq=residue_group.resseq,
                      resname=residue_group.atom_groups()[0].resname,
                      icode=residue_group.icode,
                      altloc=conformer.altloc)
              if len(chain.id) == 1:
                chain_id = " "+chain.id
              else:
                chain_id = chain.id
              file_key = '%s%s%s' %(residue.resname,
                                    chain_id,
                                    residue_group.resid())
              file_key = file_key.strip()
              file_match = self.residue_match_hash.get(file_key)
              if file_match is not None:
                file = file_match[0]
              else:
                continue
              model_rot = model_hash.get(key)
              reference_rot = reference_hash[file].get(self.one_key_to_another(file_match[1]))
              m_chis = model_chis.get(key)
              r_chis = reference_chis[file].get(self.one_key_to_another(file_match[1]))
              if model_rot is not None and reference_rot is not None and \
                  m_chis is not None and r_chis is not None:
                if (model_rot == 'OUTLIER' and \
                    reference_rot != 'OUTLIER'): # or \
                    #atom_group.resname in ["LEU", "VAL", "THR"]:
                  self.change_residue_rotamer_in_place(
                      sites_cart_start,residue, m_chis,r_chis,self.mon_lib_srv)
                  xray_structure.set_sites_cart(sites_cart_start)

                elif self.params.strict_rotamer_matching and \
                  (model_rot != 'OUTLIER' and reference_rot != 'OUTLIER'):
                  if model_rot != reference_rot:
                    self.change_residue_rotamer_in_place(
                        sites_cart_start,residue, m_chis,r_chis,self.mon_lib_srv)
                    xray_structure.set_sites_cart(sites_cart_start)