Ejemplo n.º 1
0
  def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8,
                molprobity_map_params=None) :

    from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
    from mmtbx import real_space_correlation

    validation.__init__(self)

    # arrays for different components
    self.everything = list()
    self.protein = list()
    self.other = list()
    self.water = list()
    aa_codes = one_letter_given_three_letter.keys()

    # redo real_space_corelation.simple to use map objects instead of filenames
    try :
      rsc_params = real_space_correlation.master_params().extract()
      rsc_params.detail="residue"
      rsc_params.map_1.fill_missing_reflections = False
      rsc_params.map_2.fill_missing_reflections = False
      if (molprobity_map_params is not None):
        rsc_params.map_file_name = molprobity_map_params.map_file_name
        rsc_params.map_coefficients_file_name = \
          molprobity_map_params.map_coefficients_file_name
        rsc_params.map_coefficients_label = \
          molprobity_map_params.map_coefficients_label
      rsc = real_space_correlation.simple(
        fmodel=fmodel,
        pdb_hierarchy=pdb_hierarchy,
        params=rsc_params,
        log=null_out())
    except Exception, e :
      raise
Ejemplo n.º 2
0
  def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8) :

    from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
    from mmtbx import real_space_correlation

    validation.__init__(self)

    # arrays for different components
    self.everything = list()
    self.protein = list()
    self.other = list()
    self.water = list()
    aa_codes = one_letter_given_three_letter.keys()

    try :
      rsc_params = real_space_correlation.master_params().extract()
      rsc_params.detail="residue"
      rsc_params.map_1.fill_missing_reflections = False
      rsc_params.map_2.fill_missing_reflections = False
      rsc = real_space_correlation.simple(
        fmodel=fmodel,
        pdb_hierarchy=pdb_hierarchy,
        params=rsc_params,
        log=null_out())
    except Exception, e :
      raise "Error: %s" % str(e)
Ejemplo n.º 3
0
  def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8,
                molprobity_map_params=None) :

    from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
    from mmtbx import real_space_correlation

    validation.__init__(self)

    # arrays for different components
    self.everything = list()
    self.protein = list()
    self.other = list()
    self.water = list()
    aa_codes = one_letter_given_three_letter.keys()

    # redo real_space_corelation.simple to use map objects instead of filenames
    try :
      rsc_params = real_space_correlation.master_params().extract()
      rsc_params.detail="residue"
      rsc_params.map_1.fill_missing_reflections = False
      rsc_params.map_2.fill_missing_reflections = False
      if (molprobity_map_params is not None):
        rsc_params.map_file_name = molprobity_map_params.map_file_name
        rsc_params.map_coefficients_file_name = \
          molprobity_map_params.map_coefficients_file_name
        rsc_params.map_coefficients_label = \
          molprobity_map_params.map_coefficients_label
      rsc = real_space_correlation.simple(
        fmodel=fmodel,
        pdb_hierarchy=pdb_hierarchy,
        params=rsc_params,
        log=null_out())
    except Exception, e :
      raise e
Ejemplo n.º 4
0
  def __init__(self, pdb_hierarchy):
    re_flip = re.compile(":FLIP")
    validation.__init__(self)
    in_lines = pdb_hierarchy.as_pdb_string()
    reduce_out = run_reduce_with_timeout(
        parameters="-BUILD -",
        stdin_lines=in_lines)
    check_and_report_reduce_failure(
        fb_object=reduce_out,
        input_lines=in_lines,
        output_fname="reduce_fail.pdb")
    from mmtbx.validation import utils
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)
    for line in reduce_out.stdout_lines:
    #chain format (2-char chain)
    #USER  MOD Set 1.1: B  49 GLN     :FLIP  amide:sc=    -2.7! C(o=-5.8!,f=-1.3!)
    #segid format (4-char segid)
    #USER  MOD Set 1.1:B     49 GLN     :FLIP  amide:sc=    -2.7! C(o=-5.8!,f=-1.3!)
      if re_flip.search(line):
        resid = line.split(":")[1]
        #reduce has slightly different outputs using chains versus segid
        if len(resid) == 15: #chain
          chain_id = resid[0:2].strip()
          segid = None
          if (len(chain_id) == 0):
            chain_id = ' '
          resid_less_chain = resid[2:]
        elif len(resid) == 17: #segid
          #self.results = []
          #return
          chain_id = None
          segid = resid[0:4].strip()
          #chain_id = resid[0:4].strip()
          resid_less_chain = resid[4:]
        else:
          raise Sorry("unexpected length of residue identifier in reduce USER MODs.")
        resname = resid_less_chain[5:8]

        assert (resname in ["ASN", "GLN", "HIS"])
        flip = nqh_flip(
          chain_id=chain_id,
          segid=segid,
          resseq=resid_less_chain[0:4].strip(),
          icode= resid_less_chain[4:5],
          altloc=resid_less_chain[12:13],
          resname=resname,
          outlier=True)
        flip.set_coordinates_from_hierarchy(pdb_hierarchy)
        self.results.append(flip)
        self.n_outliers += 1
Ejemplo n.º 5
0
 def __init__ (self, pdb_hierarchy) :
   re_flip = re.compile(":FLIP")
   validation.__init__(self)
   reduce_out = easy_run.fully_buffered("phenix.reduce -BUILD -",
     stdin_lines=pdb_hierarchy.as_pdb_string())
   for line in reduce_out.stderr_lines :
   #orientation 4: A  68 HIS     :FLIP no HD1: bump=-0.607, HB=0.998, total=0.390
     if re_flip.search(line) :
       resname = line[22:25]
       assert (resname in ["ASN", "GLN", "HIS"])
       flip = nqh_flip(
         chain_id=line[15:17].strip(),
         resseq=line[17:21].strip(),
         icode=line[21],
         altloc=line[29],
         resname=resname)
       flip.set_coordinates_from_hierarchy(pdb_hierarchy)
       self.results.append(flip)
       self.n_outliers += 1
Ejemplo n.º 6
0
 def __init__(self,
              pdb_atoms,
              sites_cart,
              energies_sites,
              restraint_proxies,
              unit_cell,
              ignore_hd=True,
              sigma_cutoff=4.0,
              outliers_only=True,
              use_segids_in_place_of_chainids=False):
     validation.__init__(self)
     self.z_min = self.z_max = self.z_mean = None
     deviations_method = getattr(energies_sites,
                                 "%s_deviations" % self.restraint_type)
     self.min, self.max, self.mean = deviations_method()
     target = getattr(energies_sites,
                      "%s_residual_sum" % self.restraint_type)
     self.n_total = getattr(energies_sites,
                            "n_%s_proxies" % self.restraint_type)
     if (self.n_total > 0):
         self.target = target / self.n_total
     else:
         self.target = 0
     deviations_z_method = getattr(energies_sites,
                                   "%s_deviations_z" % self.restraint_type,
                                   None)
     if (deviations_z_method is not None):
         deviations_z = deviations_z_method()
         self.z_min, self.z_max, self.z_mean = deviations_z_method()
     self.results = sorted(
         self.get_outliers(
             proxies=restraint_proxies,
             unit_cell=unit_cell,
             sites_cart=sites_cart,
             pdb_atoms=pdb_atoms,
             sigma_cutoff=sigma_cutoff,
             outliers_only=outliers_only,
             use_segids_in_place_of_chainids=use_segids_in_place_of_chainids
         ))
     self.n_outliers = len(self.results)
Ejemplo n.º 7
0
 def __init__ (self,
     pdb_atoms,
     sites_cart,
     energies_sites,
     restraint_proxies,
     unit_cell,
     ignore_hd=True,
     sigma_cutoff=4.0,
     outliers_only=True,
     use_segids_in_place_of_chainids=False) :
   validation.__init__(self)
   self.z_min = self.z_max = self.z_mean = None
   deviations_method = getattr(energies_sites, "%s_deviations" %
     self.restraint_type)
   self.min, self.max, self.mean = deviations_method()
   target = getattr(energies_sites, "%s_residual_sum" %
     self.restraint_type)
   self.n_total = getattr(energies_sites, "n_%s_proxies" %
     self.restraint_type)
   if (self.n_total > 0) :
     self.target = target / self.n_total
   else :
     self.target = 0
   deviations_z_method = getattr(energies_sites, "%s_deviations_z" %
     self.restraint_type, None)
   if (deviations_z_method is not None) :
     deviations_z = deviations_z_method()
     self.z_min, self.z_max, self.z_mean = deviations_z_method()
   self.results = sorted(self.get_outliers(
     proxies=restraint_proxies,
     unit_cell=unit_cell,
     sites_cart=sites_cart,
     pdb_atoms=pdb_atoms,
     sigma_cutoff=sigma_cutoff,
     outliers_only=outliers_only,
     use_segids_in_place_of_chainids=use_segids_in_place_of_chainids))
   self.n_outliers = len(self.results)
Ejemplo n.º 8
0
 def __init__(self, pdb_hierarchy):
     re_flip = re.compile(":FLIP")
     validation.__init__(self)
     reduce_out = easy_run.fully_buffered(
         "phenix.reduce -BUILD -",
         stdin_lines=pdb_hierarchy.as_pdb_string())
     for line in reduce_out.stdout_lines:
         #USER  MOD Set 1.1: B  49 GLN     :FLIP  amide:sc=    -2.7! C(o=-5.8!,f=-1.3!)
         if re_flip.search(line):
             resid = line.split(":")[1]
             chain_id = resid[0:2].strip()
             if (len(chain_id) == 0):
                 chain_id = ' '
             resname = resid[7:10]
             assert (resname in ["ASN", "GLN", "HIS"])
             flip = nqh_flip(chain_id=chain_id,
                             resseq=resid[2:6].strip(),
                             icode=resid[6:7],
                             altloc=resid[14:15],
                             resname=resname,
                             outlier=True)
             flip.set_coordinates_from_hierarchy(pdb_hierarchy)
             self.results.append(flip)
             self.n_outliers += 1
Ejemplo n.º 9
0
 def __init__ (self, pdb_hierarchy) :
   re_flip = re.compile(":FLIP")
   validation.__init__(self)
   reduce_out = easy_run.fully_buffered("phenix.reduce -BUILD -",
     stdin_lines=pdb_hierarchy.as_pdb_string())
   for line in reduce_out.stdout_lines:
   #USER  MOD Set 1.1: B  49 GLN     :FLIP  amide:sc=    -2.7! C(o=-5.8!,f=-1.3!)
     if re_flip.search(line) :
       resid = line.split(":")[1]
       chain_id = resid[0:2].strip()
       if (len(chain_id) == 0):
         chain_id = ' '
       resname = resid[7:10]
       assert (resname in ["ASN", "GLN", "HIS"])
       flip = nqh_flip(
         chain_id=chain_id,
         resseq=resid[2:6].strip(),
         icode=resid[6:7],
         altloc=resid[14:15],
         resname=resname,
         outlier=True)
       flip.set_coordinates_from_hierarchy(pdb_hierarchy)
       self.results.append(flip)
       self.n_outliers += 1
Ejemplo n.º 10
0
    def __init__(self,
                 pdb_hierarchy,
                 xray_structure,
                 ignore_hd=True,
                 collect_outliers=True):
        for name in self.__slots__:
            setattr(self, name, None)
        validation.__init__(self)
        assert len(xray_structure.scatterers()) != 0
        from cctbx import adptbx
        from scitbx.array_family import flex
        xrs = xray_structure
        self.n_total = xrs.scatterers().size()  # always include H/D
        self.results = None
        pdb_atoms = pdb_hierarchy.atoms()
        pdb_atoms.reset_i_seq()
        hd_selection = xrs.hd_selection()
        subtract_hd = True
        self.n_all = hd_selection.size()
        self.n_hd = hd_selection.count(True)
        if (ignore_hd) and (0 < self.n_hd < self.n_all):
            xrs = xrs.select(~hd_selection)
            subtract_hd = False
        u_isos = xrs.extract_u_iso_or_u_equiv()
        occ = xrs.scatterers().extract_occupancies()
        self.n_atoms = xrs.scatterers().size()
        self.n_non_hd = self.n_all - self.n_hd
        self.n_aniso = xrs.use_u_aniso().count(True)
        self.n_aniso_h = (xray_structure.use_u_aniso()
                          & hd_selection).count(True)
        self.n_npd = xrs.is_positive_definite_u().count(False)
        self.n_zero_b = (u_isos == 0).count(True)
        self.n_zero_occ = (occ == 0).count(True)
        u_cutoff_high = sys.maxsize
        u_cutoff_low = 0
        u_non_zero = u_isos.select(u_isos > 0)
        if (len(u_non_zero) > 1):
            mv = flex.mean_and_variance(u_non_zero)
            sigma = mv.unweighted_sample_standard_deviation()
            u_cutoff_high = mv.mean() + (4.0 * sigma)
            u_cutoff_low = mv.mean() - (4.0 * sigma)
        self.b_mean = adptbx.u_as_b(flex.mean(u_isos))
        self.b_min = adptbx.u_as_b(flex.min(u_isos))
        self.b_max = adptbx.u_as_b(flex.max(u_isos))
        self.o_mean = flex.mean(occ)
        self.o_min = flex.min(occ)
        self.o_max = flex.max(occ)
        self.n_outliers = self.n_aniso_h + self.n_npd
        self.zero_occ = []
        self.partial_occ = []
        self.different_occ = []
        self.bad_adps = []
        self.b_histogram = None  # TODO

        def is_u_iso_outlier(u):
            return (u < u_cutoff_low) or (u > u_cutoff_high) or (u <= 0)

        # these statistics cover all atoms!
        occupancies = xray_structure.scatterers().extract_occupancies()
        u_isos = xray_structure.extract_u_iso_or_u_equiv()
        collected = flex.bool(occupancies.size(), False)
        if (collect_outliers):
            for i_seq, occ in enumerate(occupancies):
                if (hd_selection[i_seq] and ignore_hd) or collected[i_seq]:
                    continue
                pdb_atom = pdb_atoms[i_seq]
                parent = pdb_atom.parent()
                if (occ <= 0):
                    group_atoms = parent.atoms()
                    labels = pdb_atom.fetch_labels()
                    if (len(group_atoms) >
                            1) and (group_atoms.extract_occ().all_eq(0)):
                        i_seqs = group_atoms.extract_i_seq()
                        b_mean = adptbx.u_as_b(flex.mean(
                            u_isos.select(i_seqs)))
                        outlier = residue_occupancy(
                            chain_id=labels.chain_id,
                            resseq=labels.resseq,
                            icode=labels.icode,
                            altloc=labels.altloc,
                            resname=labels.resname,
                            occupancy=occ,
                            outlier=True,
                            xyz=group_atoms.extract_xyz().mean(),
                            b_iso=b_mean)
                        self.zero_occ.append(outlier)
                        self.n_outliers += 1
                        collected.set_selected(i_seqs, True)
                    else:
                        assert (pdb_atom.occ == occ), "%s: %s <--> %s" % (
                            pdb_atom.id_str(), pdb_atom.occ, occ)
                        outlier = atom_occupancy(pdb_atom=pdb_atom,
                                                 occupancy=occ,
                                                 b_iso=adptbx.u_as_b(
                                                     u_isos[i_seq]),
                                                 xyz=pdb_atom.xyz,
                                                 outlier=True)
                        self.zero_occ.append(outlier)
                        self.n_outliers += 1
                elif is_u_iso_outlier(u_isos[i_seq]):
                    # zero displacements will always be recorded on a per-atom basis
                    if (u_isos[i_seq] <= 0):
                        outlier = atom_bfactor(pdb_atom=pdb_atom,
                                               occupancy=occ,
                                               b_iso=adptbx.u_as_b(
                                                   u_isos[i_seq]),
                                               xyz=pdb_atom.xyz,
                                               outlier=True)
                        self.bad_adps.append(outlier)
                        self.n_outliers += 1
                    else:
                        # if the average displacement for the entire residue falls outside
                        # the cutoffs, save as a single residue outlier
                        group_atoms = parent.atoms()
                        i_seqs = group_atoms.extract_i_seq()
                        u_mean = flex.mean(u_isos.select(i_seqs))
                        if is_u_iso_outlier(u_mean):
                            labels = pdb_atom.fetch_labels()
                            outlier = residue_bfactor(
                                chain_id=labels.chain_id,
                                resseq=labels.resseq,
                                icode=labels.icode,
                                altloc=labels.altloc,
                                resname=labels.resname,
                                occupancy=occ,
                                outlier=True,
                                xyz=group_atoms.extract_xyz().mean(),
                                b_iso=adptbx.u_as_b(u_mean))
                            self.bad_adps.append(outlier)
                            self.n_outliers += 1
                            collected.set_selected(i_seqs, True)
                        # otherwise, just save this atom
                        else:
                            outlier = atom_bfactor(pdb_atom=pdb_atom,
                                                   occupancy=occ,
                                                   b_iso=adptbx.u_as_b(
                                                       u_isos[i_seq]),
                                                   xyz=pdb_atom.xyz,
                                                   outlier=True)
                            self.bad_adps.append(outlier)
                            self.n_outliers += 1

            # analyze occupancies for first model
            model = pdb_hierarchy.models()[0]
            for chain in model.chains():
                residue_groups = chain.residue_groups()
                for residue_group in chain.residue_groups():
                    # get unique set of atom names
                    atom_names = set()
                    for atom in residue_group.atoms():
                        atom_names.add(atom.name.strip())

                    # check total occupancy for each atom
                    for name in atom_names:
                        occupancy = 0.0
                        atoms = list()
                        for atom_group in residue_group.atom_groups():
                            atom = atom_group.get_atom(name)
                            if (atom is not None):
                                occupancy += atom.occ
                                atoms.append(atom)

                        if (not approx_equal(
                                occupancy, 1.0, out=None, eps=1.0e-3)):
                            for atom in atoms:
                                outlier = atom_occupancy(pdb_atom=atom,
                                                         occupancy=atom.occ,
                                                         b_iso=adptbx.u_as_b(
                                                             atom.b),
                                                         xyz=atom.xyz,
                                                         outlier=True)
                                self.partial_occ.append(outlier)
                                self.n_outliers += 1

                    # check that atoms in an atom group have the same occupancy
                    for atom_group in residue_group.atom_groups():
                        residue_is_okay = True
                        base_occupancy = atom_group.atoms()[0].occ
                        for atom in atom_group.atoms():
                            if (not approx_equal(
                                    base_occupancy, atom.occ, out=None,
                                    eps=1.0e-3)):
                                labels = atom.fetch_labels()
                                i_seqs = atom_group.atoms().extract_i_seq()
                                b_mean = adptbx.u_as_b(
                                    flex.mean(u_isos.select(i_seqs)))
                                outlier = residue_occupancy(
                                    chain_id=labels.chain_id,
                                    resseq=labels.resseq,
                                    icode=labels.icode,
                                    altloc=labels.altloc,
                                    resname=labels.resname,
                                    occupancy=occ,
                                    outlier=True,
                                    xyz=atom_group.atoms().extract_xyz().mean(
                                    ),
                                    b_iso=b_mean)
                                self.different_occ.append(outlier)
                                self.n_outliers += 1
                                residue_is_okay = False
                                break
                        if (not residue_is_okay):
                            break
Ejemplo n.º 11
0
  def __init__(self,
      pdb_hierarchy,
      nontrans_only=False,
      out=sys.stdout,
      quiet=True):
    validation.__init__(self)
    self.residue_count = [0, 0]
    #[OMEGA_GENERAL, OMEGA_PRO]
    self.omega_count = [[0,0,0], [0,0,0]]
    #[OMEGA_GENERAL, OMEGA_PRO], then
    #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED]

    from mmtbx.validation import utils
    from scitbx.array_family import flex
    self._outlier_i_seqs = flex.size_t()
    pdb_atoms = pdb_hierarchy.atoms()
    all_i_seqs = pdb_atoms.extract_i_seq()
    if all_i_seqs.all_eq(0):
      pdb_atoms.reset_i_seq()
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)

    prev_rezes, next_rezes = None, None
    prev_resid = None
    cur_resseq = None
    next_resseq = None
    for model in pdb_hierarchy.models():
      for chain in model.chains():
        prev_rezes, next_rezes = None, None
        prev_resid = None
        cur_resseq = None
        next_resseq = None
        if use_segids:
          chain_id = utils.get_segid_as_chainid(chain=chain)
        else:
          chain_id = chain.id
        residues = list(chain.residue_groups())
        for i, residue_group in enumerate(residues):
          # The reason I pass lists of atom_groups to get_phi and get_psi is to
          # deal with the particular issue where some residues have an A alt
          # conf that needs some atoms from a "" alt conf to get calculated
          # correctly.  See 1jxt.pdb for examples.  This way I can search both
          # the alt conf atoms and the "" atoms if necessary.
          prev_atom_list, next_atom_list, atom_list = None, None, None
          if cur_resseq is not None:
            prev_rezes = rezes
            prev_resseq = cur_resseq
          rezes = construct_residues(residues[i])
          cur_resseq = residue_group.resseq_as_int()
          cur_icode = residue_group.icode.strip()
          if (i > 0):
            #check for insertion codes
            if (cur_resseq == residues[i-1].resseq_as_int()) :
              if (cur_icode == '') and (residues[i-1].icode.strip() == '') :
                continue
            elif (cur_resseq != (residues[i-1].resseq_as_int())+1):
              continue
          for atom_group in residue_group.atom_groups():
            alt_conf = atom_group.altloc
            if rezes is not None:
              atom_list = rezes.get(alt_conf)
            if prev_rezes is not None:
              prev_atom_list = prev_rezes.get(alt_conf)
              if (prev_atom_list is None):
                prev_keys = sorted(prev_rezes.keys())
                prev_atom_list = prev_rezes.get(prev_keys[0])
            omega=get_omega(prev_atom_list, atom_list)
            highest_mc_b = get_highest_mc_b(prev_atom_list, atom_list)
            if omega is not None:
              resname = atom_group.resname[0:3]
              coords = get_center(atom_group)
              if resname == "PRO":
                res_type = OMEGA_PRO
              else:
                res_type = OMEGA_GENERAL
              self.residue_count[res_type] += 1
              omega_type = find_omega_type(omega)
              is_nontrans = False
              if omega_type == OMEGALYZE_CIS or omega_type == OMEGALYZE_TWISTED:
                self.n_outliers += 1
                is_nontrans = True
              self.omega_count[res_type][omega_type] += 1
              markup_atoms = [None, None, None, None] #for kinemage markup
              if is_nontrans:
                for a in prev_atom_list:
                  if a is None: continue
                  a_ = atom(pdb_atom=a)
                  if a.name.strip() == "CA":
                    markup_atoms[0] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                  elif a.name.strip() == "C":
                    markup_atoms[1] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                for a in atom_list:
                  if a is None: continue
                  a_ = atom(pdb_atom=a)
                  if a.name.strip() == "N":
                    markup_atoms[2] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                  elif a.name.strip() == "CA":
                    markup_atoms[3] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                #------------
              #prevres=residues[i-1]
              #find prev res identities for printing
              prev_alts = []
              prev_resnames = {}
              for ag in residues[i-1].atom_groups():
                prev_alts.append(ag.altloc)
                prev_resnames[ag.altloc] = ag.resname
              if alt_conf in prev_alts:
                prev_altloc = alt_conf
              else:
                if len(prev_alts) > 1:
                  prev_altloc = prev_alts[1]
                else:
                  prev_altloc = prev_alts[0]
              prev_resname = prev_resnames[prev_altloc]
              #done finding prev res identities
              result = omega_result(
                chain_id=chain_id,
                resseq=residue_group.resseq,
                icode=residue_group.icode,
                resname=atom_group.resname,
                altloc=atom_group.altloc,
                prev_resseq=residues[i-1].resseq,
                prev_icode=residues[i-1].icode,
                prev_resname=prev_resname,
                prev_altloc=prev_altloc,
                segid=None,
                omega=omega,
                omega_type=omega_type,
                res_type=res_type,
                is_nontrans=is_nontrans,
                outlier=is_nontrans,
                highest_mc_b=highest_mc_b,
                xyz=coords,
                markup_atoms=markup_atoms)
              if is_nontrans or not nontrans_only: #(not nontrans_only or is_nontrans)
                self.results.append(result)
              if is_nontrans:
                i_seqs = atom_group.atoms().extract_i_seq()
                assert (not i_seqs.all_eq(0)) #This assert copied from ramalyze
                self._outlier_i_seqs.extend(i_seqs)
Ejemplo n.º 12
0
  def __init__(self,
      pdb_hierarchy,
      nontrans_only=False,
      out=sys.stdout,
      quiet=True):
    validation.__init__(self)
    self.residue_count = [0, 0]
    #[OMEGA_GENERAL, OMEGA_PRO]
    self.omega_count = [[0,0,0], [0,0,0]]
    #[OMEGA_GENERAL, OMEGA_PRO], then
    #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED]

    from mmtbx.validation import utils
    from scitbx.array_family import flex
    self._outlier_i_seqs = flex.size_t()
    pdb_atoms = pdb_hierarchy.atoms()
    all_i_seqs = pdb_atoms.extract_i_seq()
    if all_i_seqs.all_eq(0):
      pdb_atoms.reset_i_seq()
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)

    prev_rezes, next_rezes = None, None
    prev_resid = None
    cur_resseq = None
    next_resseq = None
    for model in pdb_hierarchy.models():
      for chain in model.chains():
        prev_rezes, next_rezes = None, None
        prev_resid = None
        cur_resseq = None
        next_resseq = None
        if use_segids:
          chain_id = utils.get_segid_as_chainid(chain=chain)
        else:
          chain_id = chain.id
        residues = list(chain.residue_groups())
        for i, residue_group in enumerate(residues):
          # The reason I pass lists of atom_groups to get_phi and get_psi is to
          # deal with the particular issue where some residues have an A alt
          # conf that needs some atoms from a "" alt conf to get calculated
          # correctly.  See 1jxt.pdb for examples.  This way I can search both
          # the alt conf atoms and the "" atoms if necessary.
          prev_atom_list, next_atom_list, atom_list = None, None, None
          if cur_resseq is not None:
            prev_rezes = rezes
            prev_resseq = cur_resseq
          rezes = construct_residues(residues[i])
          cur_resseq = residue_group.resseq_as_int()
          cur_icode = residue_group.icode.strip()
          if (i > 0):
            #check for insertion codes
            if (cur_resseq == residues[i-1].resseq_as_int()) :
              if (cur_icode == '') and (residues[i-1].icode.strip() == '') :
                continue
            elif (cur_resseq != (residues[i-1].resseq_as_int())+1):
              continue
          for atom_group in residue_group.atom_groups():
            alt_conf = atom_group.altloc
            if rezes is not None:
              atom_list = rezes.get(alt_conf)
            if prev_rezes is not None:
              prev_atom_list = prev_rezes.get(alt_conf)
              if (prev_atom_list is None):
                prev_keys = sorted(prev_rezes.keys())
                prev_atom_list = prev_rezes.get(prev_keys[0])
            omega=get_omega(prev_atom_list, atom_list)
            highest_mc_b = get_highest_mc_b(prev_atom_list, atom_list)
            if omega is not None:
              resname = atom_group.resname[0:3]
              coords = get_center(atom_group)
              if resname == "PRO":
                res_type = OMEGA_PRO
              else:
                res_type = OMEGA_GENERAL
              self.residue_count[res_type] += 1
              omega_type = find_omega_type(omega)
              is_nontrans = False
              if omega_type == OMEGALYZE_CIS or omega_type == OMEGALYZE_TWISTED:
                self.n_outliers += 1
                is_nontrans = True
              self.omega_count[res_type][omega_type] += 1
              markup_atoms = [None, None, None, None] #for kinemage markup
              if is_nontrans:
                for a in prev_atom_list:
                  if a is None: continue
                  a_ = atom(pdb_atom=a)
                  if a.name.strip() == "CA":
                    markup_atoms[0] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                  elif a.name.strip() == "C":
                    markup_atoms[1] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                for a in atom_list:
                  if a is None: continue
                  a_ = atom(pdb_atom=a)
                  if a.name.strip() == "N":
                    markup_atoms[2] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                  elif a.name.strip() == "CA":
                    markup_atoms[3] = kin_atom(
                      id_str=a_.atom_group_id_str(),xyz=a_.xyz)
                #------------
              #prevres=residues[i-1]
              #find prev res identities for printing
              prev_alts = []
              prev_resnames = {}
              for ag in residues[i-1].atom_groups():
                prev_alts.append(ag.altloc)
                prev_resnames[ag.altloc] = ag.resname
              if alt_conf in prev_alts:
                prev_altloc = alt_conf
              else:
                if len(prev_alts) > 1:
                  prev_altloc = prev_alts[1]
                else:
                  prev_altloc = prev_alts[0]
              prev_resname = prev_resnames[prev_altloc]
              #done finding prev res identities
              result = omega_result(
                chain_id=chain_id,
                resseq=residue_group.resseq,
                icode=residue_group.icode,
                resname=atom_group.resname,
                altloc=atom_group.altloc,
                prev_resseq=residues[i-1].resseq,
                prev_icode=residues[i-1].icode,
                prev_resname=prev_resname,
                prev_altloc=prev_altloc,
                segid=None,
                omega=omega,
                omega_type=omega_type,
                res_type=res_type,
                is_nontrans=is_nontrans,
                outlier=is_nontrans,
                highest_mc_b=highest_mc_b,
                xyz=coords,
                markup_atoms=markup_atoms)
              if is_nontrans or not nontrans_only: #(not nontrans_only or is_nontrans)
                self.results.append(result)
              if is_nontrans:
                i_seqs = atom_group.atoms().extract_i_seq()
                assert (not i_seqs.all_eq(0)) #This assert copied from ramalyze
                self._outlier_i_seqs.extend(i_seqs)
Ejemplo n.º 13
0
  def __init__(self, pdb_hierarchy,
      data_version="8000",
      outliers_only=False,
      show_errors=False,
      out=sys.stdout,
      quiet=False):
    validation.__init__(self)
    self.n_allowed = 0
    self.n_favored = 0
    from mmtbx.rotamer.sidechain_angles import SidechainAngles
    from mmtbx.rotamer import rotamer_eval
    from mmtbx.rotamer.rotamer_eval import RotamerID
    from mmtbx.validation import utils
    self.data_version = data_version
#   if self.data_version == "500":    self.outlier_threshold = 0.01
    if self.data_version == "8000": self.outlier_threshold = OUTLIER_THRESHOLD
    else: raise ValueError(
      "data_version given to RotamerEval not recognized (%s)." % data_version)
    sidechain_angles = SidechainAngles(show_errors)
    rotamer_evaluator = rotamer_eval.RotamerEval(
                             data_version=data_version)
    rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names
    use_segids = utils.use_segids_in_place_of_chainids(
                   hierarchy=pdb_hierarchy)
    current_rotamers = {}
    for model in pdb_hierarchy.models():
      for chain in model.chains():
        if use_segids:
          chain_id = utils.get_segid_as_chainid(chain=chain)
        else:
          chain_id = chain.id
        for rg in chain.residue_groups():
          all_dict = construct_complete_sidechain(rg)
          for atom_group in rg.atom_groups():
            coords = get_center(atom_group)
            resname = atom_group.resname
            occupancy = get_occupancy(atom_group)
            kwargs = {
              "chain_id" : chain_id,
              "resseq" : rg.resseq,
              "icode" : rg.icode,
              "altloc" : atom_group.altloc,
              "resname" : resname,
              "xyz" : coords,
              "occupancy" : occupancy,
            }
            atom_dict = all_dict.get(atom_group.altloc)
            res_key = get_residue_key(atom_group=atom_group)
            try:
              chis = sidechain_angles.measureChiAngles(
                       atom_group,
                       atom_dict)#.get(conformer.altloc))
            except AttributeError:
              if show_errors:
                kwargs['incomplete'] = True
                result = rotamer(**kwargs)
                print >> out, '%s is missing some sidechain atoms' % \
                  result.id_str()
                self.results.append(result)
              continue
            if (chis is not None):
              if None in chis:
                continue
              cur_res = resname.lower().strip()
              if cur_res == 'mse':
                cur_res = 'met'
              value = rotamer_evaluator.evaluate(cur_res, chis)
              if value is not None:
                self.n_total += 1
                kwargs['score'] = value * 100
                wrap_chis = rotamer_id.wrap_chis(resname.strip(), chis,
                  symmetry=False)
                sym_chis = wrap_chis[:]
                sym_chis = rotamer_id.wrap_sym(resname.strip(), sym_chis)
                evaluation = self.evaluateScore(value)
                kwargs['evaluation'] = evaluation
                if evaluation == "OUTLIER":
                  kwargs['outlier'] = True
                  kwargs['rotamer_name'] = evaluation
                else:
                  kwargs['outlier'] = False
                  kwargs['rotamer_name'] = rotamer_id.identify(resname,
                    wrap_chis)
                  #deal with unclassified rotamers
                  if kwargs['rotamer_name'] == '':
                    kwargs['rotamer_name'] = "UNCLASSIFIED"
                while (len(wrap_chis) < 4):
                  wrap_chis.append(None)
                kwargs['chi_angles'] = wrap_chis
                result = rotamer(**kwargs)
                if (result.is_outlier()) or (not outliers_only):
                  self.results.append(result)
    out_count, out_percent = self.get_outliers_count_and_fraction()
    self.out_percent = out_percent * 100.0
Ejemplo n.º 14
0
 def __init__(self,
              pdb_hierarchy,
              outliers_only=False,
              out=sys.stdout,
              collect_ideal=False,
              quiet=False):
     validation.__init__(self)
     self._outlier_i_seqs = flex.size_t()
     self.beta_ideal = {}
     relevant_atom_names = {
         " CA ": None,
         " N  ": None,
         " C  ": None,
         " CB ": None
     }  # FUTURE: set
     output_list = []
     self.stats = group_args(n_results=0,
                             n_weighted_results=0,
                             n_weighted_outliers=0)
     from mmtbx.validation import utils
     use_segids = utils.use_segids_in_place_of_chainids(
         hierarchy=pdb_hierarchy)
     for model in pdb_hierarchy.models():
         for chain in model.chains():
             if use_segids:
                 chain_id = utils.get_segid_as_chainid(chain=chain)
             else:
                 chain_id = chain.id
             for rg in chain.residue_groups():
                 for i_cf, cf in enumerate(rg.conformers()):
                     for i_residue, residue in enumerate(cf.residues()):
                         if (residue.resname == "GLY"):
                             continue
                         is_first = (i_cf == 0)
                         is_alt_conf = False
                         relevant_atoms = {}
                         for atom in residue.atoms():
                             if (atom.name in relevant_atom_names):
                                 relevant_atoms[atom.name] = atom
                                 if (len(atom.parent().altloc) != 0):
                                     is_alt_conf = True
                         if ((is_first or is_alt_conf)
                                 and len(relevant_atoms) == 4):
                             result = calculate_ideal_and_deviation(
                                 relevant_atoms=relevant_atoms,
                                 resname=residue.resname)
                             dev = result.deviation
                             dihedralNABB = result.dihedral
                             betaxyz = result.ideal
                             if (dev is None): continue
                             resCB = relevant_atoms[" CB "]
                             self.stats.n_results += 1
                             self.stats.n_weighted_results += resCB.occ
                             if (dev >= 0.25 or outliers_only == False):
                                 if (dev >= 0.25):
                                     self.n_outliers += 1
                                     self.stats.n_weighted_outliers += resCB.occ
                                     self._outlier_i_seqs.append(atom.i_seq)
                                 if (is_alt_conf):
                                     altchar = cf.altloc
                                 else:
                                     altchar = " "
                                 res = residue.resname.lower()
                                 sub = chain.id
                                 if (len(sub) == 1):
                                     sub = " " + sub
                                 result = cbeta(chain_id=chain_id,
                                                resname=residue.resname,
                                                resseq=residue.resseq,
                                                icode=residue.icode,
                                                altloc=altchar,
                                                xyz=resCB.xyz,
                                                occupancy=resCB.occ,
                                                deviation=dev,
                                                dihedral_NABB=dihedralNABB,
                                                ideal_xyz=betaxyz,
                                                outlier=(dev >= 0.25))
                                 self.results.append(result)
                                 key = result.id_str()
                                 if (collect_ideal):
                                     self.beta_ideal[key] = betaxyz
Ejemplo n.º 15
0
 def __init__ (self, pdb_hierarchy, xray_structure, fmodel,
     distance_cutoff=4.0, collect_all=True) :
   validation.__init__(self)
   from mmtbx.real_space_correlation import extract_map_stats_for_single_atoms
   from cctbx import adptbx
   from scitbx.matrix import col
   self.n_bad = 0
   self.n_heavy = 0
   pdb_atoms = pdb_hierarchy.atoms()
   if(len(pdb_atoms)>1):
     assert (not pdb_atoms.extract_i_seq().all_eq(0))
   unit_cell = xray_structure.unit_cell()
   pair_asu_table = xray_structure.pair_asu_table(
     distance_cutoff = distance_cutoff)
   asu_mappings = pair_asu_table.asu_mappings()
   asu_table = pair_asu_table.table()
   u_isos = xray_structure.extract_u_iso_or_u_equiv()
   occupancies = xray_structure.scatterers().extract_occupancies()
   sites_cart = xray_structure.sites_cart()
   sites_frac = xray_structure.sites_frac()
   sel_cache = pdb_hierarchy.atom_selection_cache()
   water_sel = sel_cache.selection("resname HOH and name O")
   map_stats = extract_map_stats_for_single_atoms(
     pdb_atoms=pdb_atoms,
     xray_structure=xray_structure,
     fmodel=fmodel,
     selection=water_sel)
   waters = []
   for i_seq, atom in enumerate(pdb_atoms) :
     if (water_sel[i_seq]) :
       rt_mx_i_inv = asu_mappings.get_rt_mx(i_seq, 0).inverse()
       self.n_total += 1
       asu_dict = asu_table[i_seq]
       nearest_atom = nearest_contact = None
       for j_seq, j_sym_groups in asu_dict.items() :
         atom_j = pdb_atoms[j_seq]
         site_j = sites_frac[j_seq]
         # Filter out hydrogens
         if atom_j.element.upper().strip() in ["H", "D"]:
           continue
         for j_sym_group in j_sym_groups:
           rt_mx = rt_mx_i_inv.multiply(asu_mappings.get_rt_mx(j_seq,
             j_sym_group[0]))
           site_ji = rt_mx * site_j
           site_ji_cart = xray_structure.unit_cell().orthogonalize(site_ji)
           vec_i = col(atom.xyz)
           vec_ji = col(site_ji_cart)
           dxyz = abs(vec_i - vec_ji)
           if (nearest_contact is None) or (dxyz < nearest_contact) :
             nearest_contact = dxyz
             nearest_atom = atom_info(pdb_atom=atom_j, symop=rt_mx)
       w = water(
         pdb_atom=atom,
         b_iso=adptbx.u_as_b(u_isos[i_seq]),
         occupancy=occupancies[i_seq],
         nearest_contact=nearest_contact,
         nearest_atom=nearest_atom,
         score=map_stats.two_fofc_ccs[i_seq],
         fmodel=map_stats.fmodel_values[i_seq],
         two_fofc=map_stats.two_fofc_values[i_seq],
         fofc=map_stats.fofc_values[i_seq],
         anom=map_stats.anom_values[i_seq],
         n_hbonds=None) # TODO
       if (w.is_bad_water()) :
         w.outlier = True
         self.n_bad += 1
       elif (w.is_heavy_atom()) :
         w.outlier = True
         self.n_heavy += 1
       if (w.outlier) or (collect_all) :
         self.results.append(w)
   self.n_outliers = len(self.results)
Ejemplo n.º 16
0
  def __init__ (self,
      pdb_hierarchy,
      outliers_only=False,
      show_errors=False,
      out=sys.stdout,
      quiet=False) :
    # Optimization hint: make it possible to pass
    # ramachandran_eval.RamachandranEval() from outside.
    # Better - convert this to using mmtbx.model.manager where
    # RamachandranEval is already available.
    validation.__init__(self)
    self.n_allowed = 0
    self.n_favored = 0
    self.n_type = [ 0 ] * 6
    self._outlier_i_seqs = flex.size_t()
    pdb_atoms = pdb_hierarchy.atoms()
    all_i_seqs = pdb_atoms.extract_i_seq()
    if (all_i_seqs.all_eq(0)) :
      pdb_atoms.reset_i_seq()
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)
    analysis = ""
    output_list = []
    count_keys = []
    uniqueness_keys = []
    r = ramachandran_eval.RamachandranEval()
    ##if use_segids:
    ##      chain_id = utils.get_segid_as_chainid(chain=chain)
    ##    else:
    ##      chain_id = chain.id
    for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None):
      main_residue = three[1]
      phi_psi_atoms = three.get_phi_psi_atoms()
      if phi_psi_atoms is None:
        continue
      phi_atoms, psi_atoms = phi_psi_atoms
      phi = get_dihedral(phi_atoms)
      psi = get_dihedral(psi_atoms)
      coords = get_center(main_residue) #should find the CA of the center residue

      if (phi is not None and psi is not None):
        res_type = RAMA_GENERAL
        #self.n_total += 1
        if (main_residue.resname[0:3] == "GLY"):
          res_type = RAMA_GLYCINE
        elif (main_residue.resname[0:3] == "PRO"):
          is_cis = is_cis_peptide(three)
          if is_cis:
            res_type = RAMA_CISPRO
          else:
            res_type = RAMA_TRANSPRO
        elif (three[2].resname == "PRO"):
          res_type = RAMA_PREPRO
        elif (main_residue.resname[0:3] == "ILE" or \
              main_residue.resname[0:3] == "VAL"):
          res_type = RAMA_ILE_VAL
        #self.n_type[res_type] += 1
        value = r.evaluate(res_types[res_type], [phi, psi])
        ramaType = self.evaluateScore(res_type, value)
        is_outlier = ramaType == RAMALYZE_OUTLIER

        c_alphas = None
        # XXX only save kinemage data for outliers
        if is_outlier :
          c_alphas = get_cas_from_three(three)
          assert (len(c_alphas) == 3)
          markup = self.as_markup_for_kinemage(c_alphas)
        else:
          markup = None
        result = ramachandran(
          model_id=main_residue.parent().parent().parent().id,
          chain_id=main_residue.parent().parent().id,
          resseq=main_residue.resseq,
          icode=main_residue.icode,
          resname=main_residue.resname,
          #altloc=main_residue.parent().altloc,
          altloc=get_altloc_from_three(three),
          segid=None, # XXX ???
          phi=phi,
          psi=psi,
          rama_type=ramaType,
          res_type=res_type,
          score=value*100,
          outlier=is_outlier,
          xyz=coords,
          markup=markup)
        #if result.chain_id+result.resseq+result.icode not in count_keys:
        result_key = result.model_id+result.chain_id+result.resseq+result.icode
        if result.altloc in ['','A'] and result_key not in count_keys:
          self.n_total += 1
          self.n_type[res_type] += 1
          self.add_to_validation_counts(ramaType)
          count_keys.append(result_key)
        if (not outliers_only or is_outlier) :
          if (result.altloc != '' or
            result_key not in uniqueness_keys):
            #the threes/conformers method results in some redundant result
            #  calculations in structures with alternates. Using the
            #  uniqueness_keys list prevents redundant results being added to
            #  the final list
            self.results.append(result)
            uniqueness_keys.append(result_key)
        if is_outlier :
          i_seqs = main_residue.atoms().extract_i_seq()
          assert (not i_seqs.all_eq(0))
          self._outlier_i_seqs.extend(i_seqs)
    self.results.sort(key=lambda r: r.model_id+r.id_str())
    out_count, out_percent = self.get_outliers_count_and_fraction()
    fav_count, fav_percent = self.get_favored_count_and_fraction()
    self.out_percent = out_percent * 100.0
    self.fav_percent = fav_percent * 100.0
Ejemplo n.º 17
0
    def __init__(
        self, pdb_hierarchy, data_version="8000", outliers_only=False, show_errors=False, out=sys.stdout, quiet=False
    ):
        validation.__init__(self)
        self.n_allowed = 0
        self.n_favored = 0
        from mmtbx.rotamer.sidechain_angles import SidechainAngles
        from mmtbx.rotamer import rotamer_eval
        from mmtbx.rotamer.rotamer_eval import RotamerID
        from mmtbx.validation import utils

        self.data_version = data_version
        #   if self.data_version == "500":    self.outlier_threshold = 0.01
        if self.data_version == "8000":
            self.outlier_threshold = 0.003
        else:
            raise ValueError("data_version given to RotamerEval not recognized (%s)." % data_version)
        sidechain_angles = SidechainAngles(show_errors)
        rotamer_evaluator = rotamer_eval.RotamerEval(data_version=data_version)
        rotamer_id = rotamer_eval.RotamerID()  # loads in the rotamer names
        use_segids = utils.use_segids_in_place_of_chainids(hierarchy=pdb_hierarchy)
        current_rotamers = {}
        for model in pdb_hierarchy.models():
            for chain in model.chains():
                if use_segids:
                    chain_id = utils.get_segid_as_chainid(chain=chain)
                else:
                    chain_id = chain.id
                for rg in chain.residue_groups():
                    all_dict = construct_complete_sidechain(rg)
                    for atom_group in rg.atom_groups():
                        coords = get_center(atom_group)
                        resname = atom_group.resname
                        occupancy = get_occupancy(atom_group)
                        kwargs = {
                            "chain_id": chain_id,
                            "resseq": rg.resseq,
                            "icode": rg.icode,
                            "altloc": atom_group.altloc,
                            "resname": resname,
                            "xyz": coords,
                            "occupancy": occupancy,
                        }
                        atom_dict = all_dict.get(atom_group.altloc)
                        res_key = get_residue_key(atom_group=atom_group)
                        try:
                            chis = sidechain_angles.measureChiAngles(atom_group, atom_dict)  # .get(conformer.altloc))
                        except AttributeError:
                            if show_errors:
                                kwargs["incomplete"] = True
                                result = rotamer(**kwargs)
                                print >> out, "%s is missing some sidechain atoms" % result.id_str()
                                self.results.append(result)
                            continue
                        if chis is not None:
                            if None in chis:
                                continue
                            cur_res = resname.lower().strip()
                            if cur_res == "mse":
                                cur_res = "met"
                            value = rotamer_evaluator.evaluate(cur_res, chis)
                            if value is not None:
                                self.n_total += 1
                                kwargs["score"] = value * 100
                                wrap_chis = rotamer_id.wrap_chis(resname.strip(), chis, symmetry=False)
                                sym_chis = wrap_chis[:]
                                sym_chis = rotamer_id.wrap_sym(resname.strip(), sym_chis)
                                evaluation = self.evaluateScore(value)
                                kwargs["evaluation"] = evaluation
                                if evaluation == "OUTLIER":
                                    kwargs["outlier"] = True
                                    kwargs["rotamer_name"] = evaluation
                                else:
                                    kwargs["outlier"] = False
                                    kwargs["rotamer_name"] = rotamer_id.identify(resname, wrap_chis)
                                    # deal with unclassified rotamers
                                    if kwargs["rotamer_name"] == "":
                                        kwargs["rotamer_name"] = "UNCLASSIFIED"
                                while len(wrap_chis) < 4:
                                    wrap_chis.append(None)
                                kwargs["chi_angles"] = wrap_chis
                                result = rotamer(**kwargs)
                                if (result.is_outlier()) or (not outliers_only):
                                    self.results.append(result)
        out_count, out_percent = self.get_outliers_count_and_fraction()
        self.out_percent = out_percent * 100.0
Ejemplo n.º 18
0
 def __init__ (self, pdb_hierarchy,
     outliers_only=False,
     out=sys.stdout,
     collect_ideal=False,
     quiet=False) :
   validation.__init__(self)
   self._outlier_i_seqs = flex.size_t()
   self.beta_ideal = {}
   relevant_atom_names = {
     " CA ": None, " N  ": None, " C  ": None, " CB ": None} # FUTURE: set
   output_list = []
   from mmtbx.validation import utils
   use_segids = utils.use_segids_in_place_of_chainids(
     hierarchy=pdb_hierarchy)
   for model in pdb_hierarchy.models():
     for chain in model.chains():
       if use_segids:
         chain_id = utils.get_segid_as_chainid(chain=chain)
       else:
         chain_id = chain.id
       for rg in chain.residue_groups():
         for i_cf,cf in enumerate(rg.conformers()):
           for i_residue,residue in enumerate(cf.residues()):
             if (residue.resname == "GLY") :
               continue
             is_first = (i_cf == 0)
             is_alt_conf = False
             relevant_atoms = {}
             for atom in residue.atoms():
               if (atom.name in relevant_atom_names):
                 relevant_atoms[atom.name] = atom
                 if (len(atom.parent().altloc) != 0):
                   is_alt_conf = True
             if ((is_first or is_alt_conf) and len(relevant_atoms) == 4):
               result = calculate_ideal_and_deviation(
                 relevant_atoms=relevant_atoms,
                 resname=residue.resname)
               dev = result.deviation
               dihedralNABB = result.dihedral
               betaxyz = result.ideal
               if (dev is None) : continue
               if(dev >=0.25 or outliers_only==False):
                 if(dev >=0.25):
                   self.n_outliers+=1
                   self._outlier_i_seqs.append(atom.i_seq)
                 if (is_alt_conf):
                   altchar = cf.altloc
                 else:
                   altchar = " "
                 res=residue.resname.lower()
                 sub=chain.id
                 if(len(sub)==1):
                   sub=" "+sub
                 resCB = relevant_atoms[" CB "]
                 result = cbeta(
                   chain_id=chain_id,
                   resname=residue.resname,
                   resseq=residue.resseq,
                   icode=residue.icode,
                   altloc=altchar,
                   xyz=resCB.xyz,
                   occupancy=resCB.occ,
                   deviation=dev,
                   dihedral_NABB=dihedralNABB,
                   ideal_xyz=betaxyz,
                   outlier=(dev >= 0.25))
                 self.results.append(result)
                 key = result.id_str()
                 if (collect_ideal) :
                   self.beta_ideal[key] = betaxyz
Ejemplo n.º 19
0
 def __init__ (self,
     pdb_hierarchy,
     keep_hydrogens=True,
     nuclear=False,
     force_unique_chain_ids=False,
     time_limit=120,
     b_factor_cutoff=None,
     save_probe_unformatted_file=None,
     save_modified_hierarchy=False,
     verbose=False,
     out=sys.stdout) :
   validation.__init__(self)
   self.b_factor_cutoff = b_factor_cutoff
   self.clashscore = None
   self.clashscore_b_cutoff = None
   self.clash_dict = {}
   self.clash_dict_b_cutoff = {}
   self.list_dict = {}
   self.probe_file = None
   if (not libtbx.env.has_module(name="probe")):
     raise RuntimeError(
       "Probe could not be detected on your system.  Please make sure "+
       "Probe is in your path.\nProbe is available at "+
       "http://kinemage.biochem.duke.edu/")
   if verbose:
     if not nuclear:
       print "\nUsing electron cloud x-H distances and vdW radii"
     else:
       print "\nUsing nuclear cloud x-H distances and vdW radii"
   import iotbx.pdb.hierarchy
   from scitbx.array_family import flex
   from mmtbx.validation import utils
   n_models = len(pdb_hierarchy.models())
   use_segids = utils.use_segids_in_place_of_chainids(
                  hierarchy=pdb_hierarchy)
   for i_mod, model in enumerate(pdb_hierarchy.models()):
     input_str,_ = check_and_add_hydrogen(
       pdb_hierarchy=pdb_hierarchy,
       model_number=i_mod,
       nuclear=nuclear,
       verbose=verbose,
       time_limit=time_limit,
       keep_hydrogens=keep_hydrogens,
       log=out)
     r = iotbx.pdb.hierarchy.root()
     mdc = model.detached_copy()
     r.append_model(mdc)
     occ_max = flex.max(r.atoms().extract_occ())
     pcm = probe_clashscore_manager(
       h_pdb_string=input_str,
       nuclear=nuclear,
       largest_occupancy=occ_max,
       b_factor_cutoff=b_factor_cutoff,
       use_segids=use_segids,
       verbose=verbose)
     if (save_modified_hierarchy) :
       self.pdb_hierarchy = iotbx.pdb.hierarchy.input(
         pdb_string=pcm.h_pdb_string).hierarchy
     self.clash_dict[model.id] = pcm.clashscore
     self.clash_dict_b_cutoff[model.id] = pcm.clashscore_b_cutoff
     self.list_dict[model.id] = pcm.bad_clashes
     if (n_models == 1) or (self.clashscore is None) :
       self.results = pcm.bad_clashes
       self.n_outliers = len(self.results)
       self.clashscore = pcm.clashscore
       self.clashscore_b_cutoff = pcm.clashscore_b_cutoff
     if (save_probe_unformatted_file is not None) and (n_models == 1) :
       open(save_probe_unformatted_file, "w").write(pcm.probe_unformatted)
       self.probe_file = save_probe_unformatted_file
Ejemplo n.º 20
0
    def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None):

        from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
        from mmtbx import real_space_correlation

        validation.__init__(self)

        pdb_hierarchy = model.get_hierarchy()
        crystal_symmetry = model.crystal_symmetry()

        # arrays for different components
        self.everything = list()
        self.protein = list()
        self.other = list()
        self.water = list()
        aa_codes = one_letter_given_three_letter.keys()

        # redo real_space_corelation.simple to use map objects instead of filenames
        self.overall_rsc = None
        rsc = None
        try:
            rsc_params = real_space_correlation.master_params().extract()
            rsc_params.detail = "residue"
            rsc_params.map_1.fill_missing_reflections = False
            rsc_params.map_2.fill_missing_reflections = False
            use_maps = False
            if (molprobity_map_params is not None):
                rsc_params.map_coefficients_file_name = \
                  molprobity_map_params.map_coefficients_file_name
                rsc_params.map_coefficients_label = \
                  molprobity_map_params.map_coefficients_label
                if (molprobity_map_params.map_file_name is not None):
                    use_maps = True
            # use mmtbx/command_line/map_model_cc.py for maps
            self.fsc = None
            if (use_maps):
                from iotbx import map_and_model
                from mmtbx.maps import map_model_cc
                from mmtbx.command_line.map_model_cc import get_fsc
                from iotbx.file_reader import any_file
                params = map_model_cc.master_params().extract()
                params.map_model_cc.resolution = molprobity_map_params.d_min
                map_object = any_file(
                    molprobity_map_params.map_file_name).file_object

                # check that model crystal symmetry matches map crystal symmetry
                mmi = map_and_model.input(map_data=map_object.map_data(),
                                          model=model)

                rsc_object = map_model_cc.map_model_cc(
                    mmi.map_data(),
                    mmi.model().get_hierarchy(), mmi.crystal_symmetry(),
                    params.map_model_cc)
                rsc_object.validate()
                rsc_object.run()
                rsc = rsc_object.get_results()
                self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks)

                self.fsc = get_fsc(mmi.map_data(), mmi.model(),
                                   params.map_model_cc)
                self.fsc.atom_radius = rsc.atom_radius
                rsc = rsc.cc_per_residue
            # mmtbx/real_space_correlation.py for X-ray/neutron data and map
            # coefficients
            else:
                self.overall_rsc, rsc = real_space_correlation.simple(
                    fmodel=fmodel,
                    pdb_hierarchy=pdb_hierarchy,
                    params=rsc_params,
                    log=null_out())
        except Exception, e:
            raise
Ejemplo n.º 21
0
    def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None):

        from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
        from mmtbx import real_space_correlation

        validation.__init__(self)

        pdb_hierarchy = model.get_hierarchy()
        crystal_symmetry = model.crystal_symmetry()

        # arrays for different components
        self.everything = list()
        self.protein = list()
        self.other = list()
        self.water = list()
        aa_codes = one_letter_given_three_letter

        # redo real_space_corelation.simple to use map objects instead of filenames
        self.overall_rsc = None
        rsc = None
        try:
            rsc_params = real_space_correlation.master_params().extract()
            rsc_params.detail = "residue"
            rsc_params.map_1.fill_missing_reflections = False
            rsc_params.map_2.fill_missing_reflections = False
            use_maps = False
            if (molprobity_map_params is not None):
                rsc_params.map_coefficients_file_name = \
                  molprobity_map_params.map_coefficients_file_name
                rsc_params.map_coefficients_label = \
                  molprobity_map_params.map_coefficients_label
                if (molprobity_map_params.map_file_name is not None):
                    use_maps = True
            # use mmtbx/command_line/map_model_cc.py for maps
            self.fsc = None
            if (use_maps):
                from iotbx import map_and_model
                from mmtbx.maps import map_model_cc
                from mmtbx.command_line.map_model_cc import get_fsc
                from iotbx.file_reader import any_file
                params = map_model_cc.master_params().extract()
                params.map_model_cc.resolution = molprobity_map_params.d_min
                map_object = any_file(
                    molprobity_map_params.map_file_name).file_object

                # check that model crystal symmetry matches map crystal symmetry
                mmi = map_and_model.input(map_data=map_object.map_data(),
                                          model=model)

                rsc_object = map_model_cc.map_model_cc(
                    mmi.map_data(),
                    mmi.model().get_hierarchy(), mmi.crystal_symmetry(),
                    params.map_model_cc)
                rsc_object.validate()
                rsc_object.run()
                rsc = rsc_object.get_results()
                self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks)

                self.fsc = get_fsc(mmi.map_data(), mmi.model(),
                                   params.map_model_cc)
                self.fsc.atom_radius = rsc.atom_radius
                rsc = rsc.cc_per_residue
            # mmtbx/real_space_correlation.py for X-ray/neutron data and map
            # coefficients
            else:
                self.overall_rsc, rsc = real_space_correlation.simple(
                    fmodel=fmodel,
                    pdb_hierarchy=pdb_hierarchy,
                    params=rsc_params,
                    log=null_out())
        except Exception as e:
            raise
        else:
            assert ((self.overall_rsc is not None) and (rsc is not None))
            for i, result_ in enumerate(rsc):
                if (use_maps
                    ):  # new rsc calculation (mmtbx/maps/model_map_cc.py)
                    result = residue_real_space(chain_id=result_.chain_id,
                                                resname=result_.resname,
                                                resseq=result_.resseq,
                                                icode=result_.icode,
                                                altloc="",
                                                score=result_.cc,
                                                b_iso=result_.b_iso_mean,
                                                occupancy=result_.occ_mean,
                                                outlier=result_.cc < cc_min,
                                                xyz=result_.xyz_mean)
                else:  # old rsc calculation (mmtbx/maps/real_space_correlation.py)
                    result = residue_real_space(
                        chain_id=result_.chain_id,
                        resname=result_.residue.resname,
                        resseq=result_.residue.resseq,
                        icode=result_.residue.icode,
                        altloc="",
                        score=result_.cc,
                        b_iso=result_.b,
                        occupancy=result_.occupancy,
                        fmodel=result_.map_value_1,
                        two_fofc=result_.map_value_2,
                        outlier=result_.cc < cc_min,
                        xyz=result_.residue.atoms().extract_xyz().mean())
                if result.is_outlier():
                    self.n_outliers += 1
                # XXX unlike other validation metrics, we always save the results for
                # the real-space correlation, since these are used as the basis for
                # the multi-criterion plot in Phenix.  The show() method will only
                # print outliers, however.
                if (result_.residue.resname !=
                        'HOH'):  # water is handled by waters.py
                    self.everything.append(result)
                    if result_.residue.resname in one_letter_given_three_letter:
                        self.protein.append(result)
                    else:
                        self.other.append(result)
                self.everything += self.water
                self.results = self.protein
Ejemplo n.º 22
0
  def __init__ (self, fmodel, pdb_hierarchy, crystal_symmetry=None, cc_min=0.8,
                molprobity_map_params=None) :

    from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter
    from mmtbx import real_space_correlation

    validation.__init__(self)

    # arrays for different components
    self.everything = list()
    self.protein = list()
    self.other = list()
    self.water = list()
    aa_codes = one_letter_given_three_letter.keys()

    # redo real_space_corelation.simple to use map objects instead of filenames
    self.overall_rsc = None
    rsc = None
    try :
      rsc_params = real_space_correlation.master_params().extract()
      rsc_params.detail="residue"
      rsc_params.map_1.fill_missing_reflections = False
      rsc_params.map_2.fill_missing_reflections = False
      use_maps = False
      if (molprobity_map_params is not None):
        rsc_params.map_coefficients_file_name = \
          molprobity_map_params.map_coefficients_file_name
        rsc_params.map_coefficients_label = \
          molprobity_map_params.map_coefficients_label
        if (molprobity_map_params.map_file_name is not None):
          use_maps = True
      # use mmtbx/command_line/map_model_cc.py for maps
      self.fsc = None
      if (use_maps):
        from scitbx.array_family import flex
        import iotbx.pdb
        from mmtbx.maps import map_model_cc
        from mmtbx.command_line.map_model_cc import get_fsc
        from iotbx.file_reader import any_file
        from cctbx import crystal, sgtbx
        params = map_model_cc.master_params().extract()
        params.map_model_cc.resolution = molprobity_map_params.d_min
        map_object = any_file(molprobity_map_params.map_file_name).file_object

        # ---------------------------------------------------------------------
        # check that model crystal symmetry matches map crystal symmetry
        # if inconsistent, map parameters take precedence
        # TODO: centralize data consistency checks prior to running validation
        map_crystal_symmetry = crystal.symmetry(
          unit_cell=map_object.unit_cell(),
          space_group=sgtbx.space_group_info(
            map_object.space_group_number).group())
        if (not map_crystal_symmetry.is_similar_symmetry(crystal_symmetry)):
          crystal_symmetry = map_crystal_symmetry

        # ---------------------------------------------------------------------

        map_data = map_object.map_data()
        rsc_object = map_model_cc.map_model_cc(
          map_data, pdb_hierarchy, crystal_symmetry, params.map_model_cc)
        rsc_object.validate()
        rsc_object.run()
        rsc = rsc_object.get_results()
        self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks)

        # pdb_hierarchy.as_pdb_input is being phased out since that function
        # just re-processes the file from text and can be lossy
        # this is a placeholder until tools get updated to use the model class
        pdb_input = iotbx.pdb.input(
          source_info='pdb_hierarchy',
          lines=flex.split_lines(pdb_hierarchy.as_pdb_string()))
        model = mmtbx.model.manager(model_input = pdb_input)
        self.fsc = get_fsc(map_data, model, params.map_model_cc)
        #

        self.fsc.atom_radius = rsc.atom_radius
        rsc = rsc.cc_per_residue
      # mmtbx/real_space_correlation.py for X-ray/neutron data and map
      # coefficients
      else:
        self.overall_rsc, rsc = real_space_correlation.simple(
          fmodel=fmodel,
          pdb_hierarchy=pdb_hierarchy,
          params=rsc_params,
          log=null_out())
    except Exception, e :
      raise
Ejemplo n.º 23
0
  def __init__ (self, pdb_hierarchy, xray_structure, ignore_hd=True,
      collect_outliers=True) :
    for name in self.__slots__ :
      setattr(self, name, None)
    validation.__init__(self)
    assert len(xray_structure.scatterers()) != 0
    from cctbx import adptbx
    from scitbx.array_family import flex
    xrs = xray_structure
    self.n_total = xrs.scatterers().size() # always include H/D
    self.results = None
    pdb_atoms = pdb_hierarchy.atoms()
    pdb_atoms.reset_i_seq()
    hd_selection = xrs.hd_selection()
    subtract_hd = True
    self.n_all = hd_selection.size()
    self.n_hd = hd_selection.count(True)
    if (ignore_hd) and (0 < self.n_hd < self.n_all) :
      xrs = xrs.select(~hd_selection)
      subtract_hd = False
    u_isos = xrs.extract_u_iso_or_u_equiv()
    occ = xrs.scatterers().extract_occupancies()
    self.n_atoms = xrs.scatterers().size()
    self.n_non_hd = self.n_all - self.n_hd
    self.n_aniso = xrs.use_u_aniso().count(True)
    self.n_aniso_h = (xray_structure.use_u_aniso() & hd_selection).count(True)
    self.n_npd = xrs.is_positive_definite_u().count(False)
    self.n_zero_b = (u_isos == 0).count(True)
    self.n_zero_occ = (occ == 0).count(True)
    u_cutoff_high = sys.maxint
    u_cutoff_low = 0
    u_non_zero = u_isos.select(u_isos > 0)
    if (len(u_non_zero) > 1) :
      mv = flex.mean_and_variance(u_non_zero)
      sigma = mv.unweighted_sample_standard_deviation()
      u_cutoff_high = mv.mean() + (4.0 * sigma)
      u_cutoff_low = mv.mean() - (4.0 * sigma)
    self.b_mean = adptbx.u_as_b(flex.mean(u_isos))
    self.b_min = adptbx.u_as_b(flex.min(u_isos))
    self.b_max = adptbx.u_as_b(flex.max(u_isos))
    self.o_mean = flex.mean(occ)
    self.o_min = flex.min(occ)
    self.o_max = flex.max(occ)
    self.n_outliers = self.n_aniso_h + self.n_npd
    self.zero_occ = []
    self.partial_occ = []
    self.different_occ = []
    self.bad_adps = []
    self.b_histogram = None # TODO
    def is_u_iso_outlier (u) :
      return (u < u_cutoff_low) or (u > u_cutoff_high) or (u <= 0)
    # these statistics cover all atoms!
    occupancies = xray_structure.scatterers().extract_occupancies()
    u_isos = xray_structure.extract_u_iso_or_u_equiv()
    collected = flex.bool(occupancies.size(), False)
    if (collect_outliers) :
      for i_seq, occ in enumerate(occupancies) :
        if (hd_selection[i_seq] and ignore_hd) or collected[i_seq] :
          continue
        pdb_atom = pdb_atoms[i_seq]
        parent = pdb_atom.parent()
        if (occ <= 0) :
          group_atoms = parent.atoms()
          labels = pdb_atom.fetch_labels()
          if (len(group_atoms) > 1) and (group_atoms.extract_occ().all_eq(0)) :
            i_seqs = group_atoms.extract_i_seq()
            b_mean = adptbx.u_as_b(flex.mean(u_isos.select(i_seqs)))
            outlier = residue_occupancy(
              chain_id=labels.chain_id,
              resseq=labels.resseq,
              icode=labels.icode,
              altloc=labels.altloc,
              resname=labels.resname,
              occupancy=occ,
              outlier=True,
              xyz=group_atoms.extract_xyz().mean(),
              b_iso=b_mean)
            self.zero_occ.append(outlier)
            self.n_outliers += 1
            collected.set_selected(i_seqs, True)
          else :
            assert (pdb_atom.occ == occ), "%s: %s <--> %s" % (pdb_atom.id_str(),
              pdb_atom.occ, occ)
            outlier = atom_occupancy(
              pdb_atom=pdb_atom,
              occupancy=occ,
              b_iso=adptbx.u_as_b(u_isos[i_seq]),
              xyz=pdb_atom.xyz,
              outlier=True)
            self.zero_occ.append(outlier)
            self.n_outliers += 1
        elif is_u_iso_outlier(u_isos[i_seq]) :
          # zero displacements will always be recorded on a per-atom basis
          if (u_isos[i_seq] <= 0) :
            outlier = atom_bfactor(
              pdb_atom=pdb_atom,
              occupancy=occ,
              b_iso=adptbx.u_as_b(u_isos[i_seq]),
              xyz=pdb_atom.xyz,
              outlier=True)
            self.bad_adps.append(outlier)
            self.n_outliers += 1
          else :
            # if the average displacement for the entire residue falls outside
            # the cutoffs, save as a single residue outlier
            group_atoms = parent.atoms()
            i_seqs = group_atoms.extract_i_seq()
            u_mean = flex.mean(u_isos.select(i_seqs))
            if is_u_iso_outlier(u_mean) :
              labels = pdb_atom.fetch_labels()
              outlier = residue_bfactor(
                chain_id=labels.chain_id,
                resseq=labels.resseq,
                icode=labels.icode,
                altloc=labels.altloc,
                resname=labels.resname,
                occupancy=occ,
                outlier=True,
                xyz=group_atoms.extract_xyz().mean(),
                b_iso=adptbx.u_as_b(u_mean))
              self.bad_adps.append(outlier)
              self.n_outliers += 1
              collected.set_selected(i_seqs, True)
            # otherwise, just save this atom
            else :
              outlier = atom_bfactor(
                pdb_atom=pdb_atom,
                occupancy=occ,
                b_iso=adptbx.u_as_b(u_isos[i_seq]),
                xyz=pdb_atom.xyz,
                outlier=True)
              self.bad_adps.append(outlier)
              self.n_outliers += 1

      # analyze occupancies for first model
      model = pdb_hierarchy.models()[0]
      for chain in model.chains() :
        residue_groups = chain.residue_groups()
        for residue_group in chain.residue_groups() :
          # get unique set of atom names
          atom_names = set()
          for atom in residue_group.atoms():
            atom_names.add(atom.name.strip())

          # check total occupancy for each atom
          for name in atom_names:
            occupancy = 0.0
            atoms = list()
            for atom_group in residue_group.atom_groups():
              atom = atom_group.get_atom(name)
              if (atom is not None):
                occupancy += atom.occ
                atoms.append(atom)

            if ( not approx_equal(occupancy, 1.0, out=None, eps=1.0e-3) ):
              for atom in atoms:
                outlier = atom_occupancy(
                  pdb_atom=atom,
                  occupancy=atom.occ,
                  b_iso=adptbx.u_as_b(atom.b),
                  xyz=atom.xyz,
                  outlier=True)
                self.partial_occ.append(outlier)
                self.n_outliers += 1

          # check that atoms in an atom group have the same occupancy
          for atom_group in residue_group.atom_groups():
            residue_is_okay = True
            base_occupancy = atom_group.atoms()[0].occ
            for atom in atom_group.atoms():
              if (not approx_equal(base_occupancy, atom.occ,
                                   out=None, eps=1.0e-3)):
                labels = atom.fetch_labels()
                i_seqs = atom_group.atoms().extract_i_seq()
                b_mean = adptbx.u_as_b(flex.mean(u_isos.select(i_seqs)))
                outlier = residue_occupancy(
                  chain_id=labels.chain_id,
                  resseq=labels.resseq,
                  icode=labels.icode,
                  altloc=labels.altloc,
                  resname=labels.resname,
                  occupancy=occ,
                  outlier=True,
                  xyz=atom_group.atoms().extract_xyz().mean(),
                  b_iso=b_mean)
                self.different_occ.append(outlier)
                self.n_outliers += 1
                residue_is_okay = False
                break
            if (not residue_is_okay):
              break
Ejemplo n.º 24
0
   def __init__(self,
                pdb_hierarchy,
                outliers_only=False,
                out=sys.stdout,
                collect_ideal=False,
                apply_phi_psi_correction=False,
                display_phi_psi_correction=False,
                quiet=False):
       validation.__init__(self)
       self._outlier_i_seqs = flex.size_t()
       self.beta_ideal = {}
       output_list = []
       self.stats = group_args(n_results=0,
                               n_weighted_results=0,
                               n_weighted_outliers=0)
       if apply_phi_psi_correction:
           phi_psi_angles = get_phi_psi_dict(pdb_hierarchy)
           new_outliers = 0
           outliers_removed = 0
           total_residues = 0
       from mmtbx.validation import utils
       use_segids = utils.use_segids_in_place_of_chainids(
           hierarchy=pdb_hierarchy)
       for model in pdb_hierarchy.models():
           for chain in model.chains():
               if use_segids:
                   chain_id = utils.get_segid_as_chainid(chain=chain)
               else:
                   chain_id = chain.id
               for rg in chain.residue_groups():
                   for i_cf, cf in enumerate(rg.conformers()):
                       for i_residue, residue in enumerate(cf.residues()):
                           if (residue.resname == "GLY"):
                               continue
                           is_first = (i_cf == 0)
                           is_alt_conf = False
                           relevant_atoms = {}
                           for atom in residue.atoms():
                               if (atom.name in relevant_atom_names):
                                   relevant_atoms[atom.name] = atom
                                   if (len(atom.parent().altloc) != 0):
                                       is_alt_conf = True
                           if ((is_first or is_alt_conf)
                                   and len(relevant_atoms) == 4):
                               result = calculate_ideal_and_deviation(
                                   relevant_atoms=relevant_atoms,
                                   resname=residue.resname)
                               dev = result.deviation
                               dihedralNABB = result.dihedral
                               betaxyz = result.ideal
                               if (dev is None): continue
                               resCB = relevant_atoms[" CB "]
                               self.stats.n_results += 1
                               self.stats.n_weighted_results += resCB.occ
                               if (is_alt_conf):
                                   altchar = cf.altloc
                               else:
                                   altchar = " "
                               if apply_phi_psi_correction:
                                   total_residues += 1
                                   id_str = '|%s:%s|' % (residue.id_str(),
                                                         altchar)
                                   phi_psi = phi_psi_angles.get(id_str, None)
                                   if phi_psi:
                                       rc = cbd_utils.get_phi_psi_correction(
                                           result,
                                           residue,
                                           phi_psi,
                                           display_phi_psi_correction=
                                           display_phi_psi_correction,
                                       )
                                       if rc:
                                           dev, dihedralNABB, start, finish = rc
                                           if start and not finish:
                                               outliers_removed += 1
                                           elif not start and finish:
                                               new_outliers += 1
                               if (dev >= 0.25 or outliers_only == False):
                                   if (dev >= 0.25):
                                       self.n_outliers += 1
                                       self.stats.n_weighted_outliers += resCB.occ
                                       self._outlier_i_seqs.append(atom.i_seq)
                                   res = residue.resname.lower()
                                   sub = chain.id
                                   if (len(sub) == 1):
                                       sub = " " + sub
                                   result = cbeta(chain_id=chain_id,
                                                  resname=residue.resname,
                                                  resseq=residue.resseq,
                                                  icode=residue.icode,
                                                  altloc=altchar,
                                                  xyz=resCB.xyz,
                                                  occupancy=resCB.occ,
                                                  deviation=dev,
                                                  dihedral_NABB=dihedralNABB,
                                                  ideal_xyz=betaxyz,
                                                  outlier=(dev >= 0.25))
                                   self.results.append(result)
                                   key = result.id_str()
                                   if (collect_ideal):
                                       self.beta_ideal[key] = betaxyz
           if apply_phi_psi_correction:
               print('''
 Outliers removed : %5d
 New outliers     : %5d
 Num. of outliers : %5d
 Num. of residues : %5d
 ''' % (
                   outliers_removed,
                   new_outliers,
                   self.n_outliers,
                   total_residues,
               ))
Ejemplo n.º 25
0
 def __init__(self,
              pdb_hierarchy,
              keep_hydrogens=True,
              nuclear=False,
              force_unique_chain_ids=False,
              time_limit=120,
              b_factor_cutoff=None,
              save_modified_hierarchy=False,
              verbose=False,
              do_flips=False,
              out=sys.stdout):
     validation.__init__(self)
     self.b_factor_cutoff = b_factor_cutoff
     self.clashscore = None
     self.clashscore_b_cutoff = None
     self.clash_dict = {}
     self.clash_dict_b_cutoff = {}
     self.list_dict = {}
     self.probe_file = None
     if (not libtbx.env.has_module(name="probe")):
         raise RuntimeError(
             "Probe could not be detected on your system.  Please make sure "
             + "Probe is in your path.\nProbe is available at " +
             "http://kinemage.biochem.duke.edu/")
     if verbose:
         if not nuclear:
             print "\nUsing electron cloud x-H distances and vdW radii"
         else:
             print "\nUsing nuclear cloud x-H distances and vdW radii"
     import iotbx.pdb.hierarchy
     from scitbx.array_family import flex
     from mmtbx.validation import utils
     n_models = len(pdb_hierarchy.models())
     use_segids = utils.use_segids_in_place_of_chainids(
         hierarchy=pdb_hierarchy)
     for i_mod, model in enumerate(pdb_hierarchy.models()):
         input_str, _ = check_and_add_hydrogen(
             pdb_hierarchy=pdb_hierarchy,
             model_number=i_mod,
             nuclear=nuclear,
             verbose=verbose,
             time_limit=time_limit,
             keep_hydrogens=keep_hydrogens,
             do_flips=do_flips,
             log=out)
         r = iotbx.pdb.hierarchy.root()
         mdc = model.detached_copy()
         r.append_model(mdc)
         occ_max = flex.max(r.atoms().extract_occ())
         self.probe_clashscore_manager = probe_clashscore_manager(
             h_pdb_string=input_str,
             nuclear=nuclear,
             largest_occupancy=occ_max,
             b_factor_cutoff=b_factor_cutoff,
             use_segids=use_segids,
             verbose=verbose)
         if (save_modified_hierarchy):
             self.pdb_hierarchy = iotbx.pdb.hierarchy.input(
                 pdb_string=self.probe_clashscore_manager.h_pdb_string
             ).hierarchy
         self.clash_dict[
             model.id] = self.probe_clashscore_manager.clashscore
         self.clash_dict_b_cutoff[model.id] = self.probe_clashscore_manager.\
                                              clashscore_b_cutoff
         self.list_dict[
             model.id] = self.probe_clashscore_manager.bad_clashes
         if (n_models == 1) or (self.clashscore is None):
             self.results = self.probe_clashscore_manager.bad_clashes
             self.n_outliers = len(self.results)
             self.clashscore = self.probe_clashscore_manager.clashscore
             self.clashscore_b_cutoff = self.probe_clashscore_manager.\
                                        clashscore_b_cutoff
Ejemplo n.º 26
0
  def __init__ (self,
      pdb_hierarchy,
      outliers_only=False,
      show_errors=False,
      out=sys.stdout,
      quiet=False) :
    validation.__init__(self)
    self.n_allowed = 0
    self.n_favored = 0
    self.n_type = [ 0 ] * 6
    self._outlier_i_seqs = flex.size_t()
    pdb_atoms = pdb_hierarchy.atoms()
    all_i_seqs = pdb_atoms.extract_i_seq()
    if (all_i_seqs.all_eq(0)) :
      pdb_atoms.reset_i_seq()
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)
    analysis = ""
    output_list = []
    count_keys = []
    uniqueness_keys = []
    r = ramachandran_eval.RamachandranEval()
    ##if use_segids:
    ##      chain_id = utils.get_segid_as_chainid(chain=chain)
    ##    else:
    ##      chain_id = chain.id
    for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None):
      main_residue = three[1]
      phi_psi_atoms = three.get_phi_psi_atoms()
      if phi_psi_atoms is None:
        continue
      phi_atoms, psi_atoms = phi_psi_atoms
      phi = get_dihedral(phi_atoms)
      psi = get_dihedral(psi_atoms)
      coords = get_center(main_residue) #should find the CA of the center residue

      if (phi is not None and psi is not None):
        res_type = RAMA_GENERAL
        #self.n_total += 1
        if (main_residue.resname[0:3] == "GLY"):
          res_type = RAMA_GLYCINE
        elif (main_residue.resname[0:3] == "PRO"):
          is_cis = is_cis_peptide(three)
          if is_cis:
            res_type = RAMA_CISPRO
          else:
            res_type = RAMA_TRANSPRO
        elif (three[2].resname == "PRO"):
          res_type = RAMA_PREPRO
        elif (main_residue.resname[0:3] == "ILE" or \
              main_residue.resname[0:3] == "VAL"):
          res_type = RAMA_ILE_VAL
        #self.n_type[res_type] += 1
        value = r.evaluate(res_types[res_type], [phi, psi])
        ramaType = self.evaluateScore(res_type, value)
        is_outlier = ramaType == RAMALYZE_OUTLIER

        c_alphas = None
        # XXX only save kinemage data for outliers
        if is_outlier :
          c_alphas = get_cas_from_three(three)
          assert (len(c_alphas) == 3)
          markup = self.as_markup_for_kinemage(c_alphas)
        else:
          markup = None
        result = ramachandran(
          chain_id=main_residue.parent().parent().id,
          resseq=main_residue.resseq,
          icode=main_residue.icode,
          resname=main_residue.resname,
          #altloc=main_residue.parent().altloc,
          altloc=get_altloc_from_three(three),
          segid=None, # XXX ???
          phi=phi,
          psi=psi,
          rama_type=ramaType,
          res_type=res_type,
          score=value*100,
          outlier=is_outlier,
          xyz=coords,
          markup=markup)
        #if result.chain_id+result.resseq+result.icode not in count_keys:
        if result.altloc in ['','A'] and result.chain_id+result.resseq+result.icode not in count_keys:
          self.n_total += 1
          self.n_type[res_type] += 1
          self.add_to_validation_counts(ramaType)
          count_keys.append(result.chain_id+result.resseq+result.icode)
        if (not outliers_only or is_outlier) :
          if (result.altloc != '' or
            result.chain_id+result.resseq+result.icode not in uniqueness_keys):
            #the threes/conformers method results in some redundant result
            #  calculations in structures with alternates. Using the
            #  uniqueness_keys list prevents redundant results being added to
            #  the final list
            self.results.append(result)
            uniqueness_keys.append(result.chain_id+result.resseq+result.icode)
        if is_outlier :
          i_seqs = main_residue.atoms().extract_i_seq()
          assert (not i_seqs.all_eq(0))
          self._outlier_i_seqs.extend(i_seqs)
    self.results.sort(key=lambda r: r.id_str())
    out_count, out_percent = self.get_outliers_count_and_fraction()
    fav_count, fav_percent = self.get_favored_count_and_fraction()
    self.out_percent = out_percent * 100.0
    self.fav_percent = fav_percent * 100.0
Ejemplo n.º 27
0
    def __init__(self,
                 pdb_hierarchy,
                 xray_structure,
                 fmodel,
                 distance_cutoff=4.0,
                 collect_all=True,
                 molprobity_map_params=None):
        validation.__init__(self)
        from mmtbx.real_space_correlation import extract_map_stats_for_single_atoms
        from cctbx import adptbx
        from scitbx.matrix import col
        self.n_bad = 0
        self.n_heavy = 0
        pdb_atoms = pdb_hierarchy.atoms()
        if (len(pdb_atoms) > 1):
            assert (not pdb_atoms.extract_i_seq().all_eq(0))
        unit_cell = xray_structure.unit_cell()
        pair_asu_table = xray_structure.pair_asu_table(
            distance_cutoff=distance_cutoff)
        asu_mappings = pair_asu_table.asu_mappings()
        asu_table = pair_asu_table.table()
        u_isos = xray_structure.extract_u_iso_or_u_equiv()
        occupancies = xray_structure.scatterers().extract_occupancies()
        sites_frac = xray_structure.sites_frac()
        sel_cache = pdb_hierarchy.atom_selection_cache()
        water_sel = sel_cache.selection("water")

        if (molprobity_map_params is not None):
            # assume parameters have been validated (symmetry of pdb and map matches)
            two_fofc_map = None
            fc_map = None
            d_min = None
            crystal_gridding = None

            # read two_fofc_map
            if (molprobity_map_params.map_file_name is not None):
                f = any_file(molprobity_map_params.map_file_name)
                two_fofc_map = f.file_object.map_data()
                d_min = molprobity_map_params.d_min
                crystal_gridding = maptbx.crystal_gridding(
                    f.file_object.unit_cell(),
                    space_group_info=space_group_info(
                        f.file_object.space_group_number),
                    pre_determined_n_real=f.file_object.unit_cell_grid)

                pdb_atoms = pdb_hierarchy.atoms()
                xray_structure = pdb_hierarchy.extract_xray_structure(
                    crystal_symmetry=f.crystal_symmetry())
                unit_cell = xray_structure.unit_cell()
                # check for origin shift
                # ---------------------------------------------------------------------
                soin = maptbx.shift_origin_if_needed(
                    map_data=two_fofc_map,
                    sites_cart=xray_structure.sites_cart(),
                    crystal_symmetry=xray_structure.crystal_symmetry())
                two_fofc_map = soin.map_data
                xray_structure.set_sites_cart(soin.sites_cart)
                # ---------------------------------------------------------------------
                pair_asu_table = xray_structure.pair_asu_table(
                    distance_cutoff=distance_cutoff)
                asu_mappings = pair_asu_table.asu_mappings()
                asu_table = pair_asu_table.table()
                u_isos = xray_structure.extract_u_iso_or_u_equiv()
                occupancies = xray_structure.scatterers().extract_occupancies()
                sites_frac = xray_structure.sites_frac()
                sel_cache = pdb_hierarchy.atom_selection_cache()
                water_sel = sel_cache.selection("water")

            elif (molprobity_map_params.map_coefficients_file_name
                  is not None):
                f = any_file(molprobity_map_params.map_coefficients_file_name)
                fourier_coefficients = f.file_server.get_miller_array(
                    molprobity_map_params.map_coefficients_label)
                crystal_symmetry = fourier_coefficients.crystal_symmetry()
                d_min = fourier_coefficients.d_min()
                crystal_gridding = maptbx.crystal_gridding(
                    crystal_symmetry.unit_cell(),
                    d_min,
                    resolution_factor=0.25,
                    space_group_info=crystal_symmetry.space_group_info())
                two_fofc_map = miller.fft_map(
                  crystal_gridding=crystal_gridding,
                  fourier_coefficients=fourier_coefficients).apply_sigma_scaling().\
                  real_map_unpadded()

            # calculate fc_map
            assert ((d_min is not None) and (crystal_gridding is not None))
            f_calc = xray_structure.structure_factors(d_min=d_min).f_calc()
            fc_map = miller.fft_map(crystal_gridding=crystal_gridding,
                                    fourier_coefficients=f_calc)
            fc_map = fc_map.apply_sigma_scaling().real_map_unpadded()

            map_stats = extract_map_stats_for_single_atoms(
                pdb_atoms=pdb_atoms,
                xray_structure=xray_structure,
                fmodel=None,
                selection=water_sel,
                fc_map=fc_map,
                two_fofc_map=two_fofc_map)
        else:
            map_stats = extract_map_stats_for_single_atoms(
                pdb_atoms=pdb_atoms,
                xray_structure=xray_structure,
                fmodel=fmodel,
                selection=water_sel)
        waters = []
        for i_seq, atom in enumerate(pdb_atoms):
            if (water_sel[i_seq]):
                rt_mx_i_inv = asu_mappings.get_rt_mx(i_seq, 0).inverse()
                self.n_total += 1
                asu_dict = asu_table[i_seq]
                nearest_atom = nearest_contact = None
                for j_seq, j_sym_groups in asu_dict.items():
                    atom_j = pdb_atoms[j_seq]
                    site_j = sites_frac[j_seq]
                    # Filter out hydrogens
                    if atom_j.element.upper().strip() in ["H", "D"]:
                        continue
                    for j_sym_group in j_sym_groups:
                        rt_mx = rt_mx_i_inv.multiply(
                            asu_mappings.get_rt_mx(j_seq, j_sym_group[0]))
                        site_ji = rt_mx * site_j
                        site_ji_cart = xray_structure.unit_cell(
                        ).orthogonalize(site_ji)
                        vec_i = col(atom.xyz)
                        vec_ji = col(site_ji_cart)
                        dxyz = abs(vec_i - vec_ji)
                        if (nearest_contact is None) or (dxyz <
                                                         nearest_contact):
                            nearest_contact = dxyz
                            nearest_atom = atom_info(pdb_atom=atom_j,
                                                     symop=rt_mx)
                w = water(pdb_atom=atom,
                          b_iso=adptbx.u_as_b(u_isos[i_seq]),
                          occupancy=occupancies[i_seq],
                          nearest_contact=nearest_contact,
                          nearest_atom=nearest_atom,
                          score=map_stats.two_fofc_ccs[i_seq],
                          fmodel=map_stats.fmodel_values[i_seq],
                          two_fofc=map_stats.two_fofc_values[i_seq],
                          fofc=map_stats.fofc_values[i_seq],
                          anom=map_stats.anom_values[i_seq],
                          n_hbonds=None)  # TODO
                if (w.is_bad_water()):
                    w.outlier = True
                    self.n_bad += 1
                elif (w.is_heavy_atom()):
                    w.outlier = True
                    self.n_heavy += 1
                if (w.outlier) or (collect_all):
                    self.results.append(w)
        self.n_outliers = len(self.results)
Ejemplo n.º 28
0
    def __init__(self,
                 pdb_hierarchy,
                 nontrans_only=False,
                 out=sys.stdout,
                 quiet=True):
        validation.__init__(self)
        self.residue_count = [0, 0]
        #[OMEGA_GENERAL, OMEGA_PRO]
        self.omega_count = [[0, 0, 0], [0, 0, 0]]
        #[OMEGA_GENERAL, OMEGA_PRO], then
        #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED]

        from mmtbx.validation import utils
        from scitbx.array_family import flex
        self._outlier_i_seqs = flex.size_t()
        pdb_atoms = pdb_hierarchy.atoms()
        all_i_seqs = pdb_atoms.extract_i_seq()
        if all_i_seqs.all_eq(0):
            pdb_atoms.reset_i_seq()
        use_segids = utils.use_segids_in_place_of_chainids(
            hierarchy=pdb_hierarchy)

        first_conf_altloc = None
        prev_chain_id = None
        for twores in generate_protein_fragments(
                pdb_hierarchy,
                length=2,
                geometry=None,
                include_non_standard_peptides=True):
            main_residue = twores[
                1]  #this is the relevant residue for id-ing cis-Pro
            conf_altloc = get_conformer_altloc(twores)
            prevres_altloc, mainres_altloc = get_local_omega_altlocs(twores)
            twores_altloc = prevres_altloc or mainres_altloc  #default '' evals False

            chain = main_residue.parent().parent()
            if use_segids:
                chain_id = utils.get_segid_as_chainid(chain=chain)
            else:
                chain_id = chain.id

            if chain_id != prev_chain_id:  #if we've moved to a new chain...
                first_conf_altloc = conf_altloc  #...reset reference altloc
                prev_chain_id = chain_id
            if (conf_altloc != first_conf_altloc) and twores_altloc == '':
                #skip non-alternate residues unless this is the first time thru a chain
                continue
            omega_atoms = get_omega_atoms(twores)
            #omega_atoms is the list [CA1 C1 N2 CA2], with None for missing atoms
            if None in omega_atoms:
                continue
            omega = get_omega(omega_atoms)
            if omega is None: continue
            omega_type = find_omega_type(omega)
            if omega_type == OMEGALYZE_TRANS:
                is_nontrans = False
            else:
                is_nontrans = True
                self.n_outliers += 1
            if main_residue.resname == "PRO": res_type = OMEGA_PRO
            else: res_type = OMEGA_GENERAL
            self.residue_count[res_type] += 1
            self.omega_count[res_type][omega_type] += 1
            highest_mc_b = get_highest_mc_b(twores[0].atoms(),
                                            twores[1].atoms())
            coords = get_center(main_residue)
            markup_atoms = []
            for omega_atom in omega_atoms:
                markup_atoms.append(
                    kin_atom(omega_atom.parent().id_str(), omega_atom.xyz))

            result = omega_result(
                model_id=twores[0].parent().parent().parent().id,
                chain_id=chain_id,
                resseq=main_residue.resseq,
                icode=main_residue.icode,
                resname=main_residue.resname,
                altloc=mainres_altloc,
                prev_resseq=twores[0].resseq,
                prev_icode=twores[0].icode,
                prev_resname=twores[0].resname,
                prev_altloc=prevres_altloc,
                segid=None,
                omega=omega,
                omega_type=omega_type,
                res_type=res_type,
                is_nontrans=is_nontrans,
                outlier=is_nontrans,
                highest_mc_b=highest_mc_b,
                xyz=coords,
                markup_atoms=markup_atoms)

            if is_nontrans or not nontrans_only:  #(not nontrans_only or is_nontrans)
                self.results.append(result)
            if is_nontrans:
                i_seqs = main_residue.atoms().extract_i_seq()
                assert (not i_seqs.all_eq(0)
                        )  #This assert copied from ramalyze
                self._outlier_i_seqs.extend(i_seqs)
            self.results.sort(key=lambda x: x.model_id + ':' + x.id_str())
Ejemplo n.º 29
0
  def __init__ (self, pdb_hierarchy, xray_structure, fmodel,
                distance_cutoff=4.0, collect_all=True,
                molprobity_map_params=None) :
    validation.__init__(self)
    from mmtbx.real_space_correlation import extract_map_stats_for_single_atoms
    from cctbx import adptbx
    from scitbx.matrix import col
    self.n_bad = 0
    self.n_heavy = 0
    pdb_atoms = pdb_hierarchy.atoms()
    if(len(pdb_atoms)>1):
      assert (not pdb_atoms.extract_i_seq().all_eq(0))
    unit_cell = xray_structure.unit_cell()
    pair_asu_table = xray_structure.pair_asu_table(
      distance_cutoff = distance_cutoff)
    asu_mappings = pair_asu_table.asu_mappings()
    asu_table = pair_asu_table.table()
    u_isos = xray_structure.extract_u_iso_or_u_equiv()
    occupancies = xray_structure.scatterers().extract_occupancies()
    sites_cart = xray_structure.sites_cart()
    sites_frac = xray_structure.sites_frac()
    sel_cache = pdb_hierarchy.atom_selection_cache()
    water_sel = sel_cache.selection("resname HOH and name O")

    if (molprobity_map_params is not None):
      # assume parameters have been validated (symmetry of pdb and map matches)
      two_fofc_map = None
      fc_map = None
      d_min = None
      crystal_gridding = None

      # read two_fofc_map
      if (molprobity_map_params.map_file_name is not None):
        f = any_file(molprobity_map_params.map_file_name)
        two_fofc_map = f.file_object.map_data()
        d_min = molprobity_map_params.d_min
        crystal_gridding = maptbx.crystal_gridding(
          f.file_object.unit_cell(),
          space_group_info=space_group_info(f.file_object.space_group_number),
          pre_determined_n_real=f.file_object.unit_cell_grid)
      elif (molprobity_map_params.map_coefficients_file_name is not None):
        f = any_file(molprobity_map_params.map_coefficients_file_name)
        fourier_coefficients = f.file_server.get_miller_array(
          molprobity_map_params.map_coefficients_label)
        crystal_symmetry = fourier_coefficients.crystal_symmetry()
        d_min = fourier_coefficients.d_min()
        crystal_gridding = maptbx.crystal_gridding(
          crystal_symmetry.unit_cell(), d_min, resolution_factor=0.25,
          space_group_info=crystal_symmetry.space_group_info())
        two_fofc_map = miller.fft_map(
          crystal_gridding=crystal_gridding,
          fourier_coefficients=fourier_coefficients).apply_sigma_scaling().\
          real_map_unpadded()

      # calculate fc_map
      assert( (d_min is not None) and (crystal_gridding is not None) )
      f_calc = xray_structure.structure_factors(d_min=d_min).f_calc()
      fc_map = miller.fft_map(crystal_gridding=crystal_gridding,
                              fourier_coefficients=f_calc)
      fc_map = fc_map.apply_sigma_scaling().real_map_unpadded()

      map_stats = extract_map_stats_for_single_atoms(
        pdb_atoms=pdb_atoms,
        xray_structure=xray_structure,
        fmodel=None,
        selection=water_sel,
        fc_map=fc_map,
        two_fofc_map=two_fofc_map)
    else:
      map_stats = extract_map_stats_for_single_atoms(
        pdb_atoms=pdb_atoms,
        xray_structure=xray_structure,
        fmodel=fmodel,
        selection=water_sel)
    waters = []
    for i_seq, atom in enumerate(pdb_atoms) :
      if (water_sel[i_seq]) :
        rt_mx_i_inv = asu_mappings.get_rt_mx(i_seq, 0).inverse()
        self.n_total += 1
        asu_dict = asu_table[i_seq]
        nearest_atom = nearest_contact = None
        for j_seq, j_sym_groups in asu_dict.items() :
          atom_j = pdb_atoms[j_seq]
          site_j = sites_frac[j_seq]
          # Filter out hydrogens
          if atom_j.element.upper().strip() in ["H", "D"]:
            continue
          for j_sym_group in j_sym_groups:
            rt_mx = rt_mx_i_inv.multiply(asu_mappings.get_rt_mx(j_seq,
              j_sym_group[0]))
            site_ji = rt_mx * site_j
            site_ji_cart = xray_structure.unit_cell().orthogonalize(site_ji)
            vec_i = col(atom.xyz)
            vec_ji = col(site_ji_cart)
            dxyz = abs(vec_i - vec_ji)
            if (nearest_contact is None) or (dxyz < nearest_contact) :
              nearest_contact = dxyz
              nearest_atom = atom_info(pdb_atom=atom_j, symop=rt_mx)
        w = water(
          pdb_atom=atom,
          b_iso=adptbx.u_as_b(u_isos[i_seq]),
          occupancy=occupancies[i_seq],
          nearest_contact=nearest_contact,
          nearest_atom=nearest_atom,
          score=map_stats.two_fofc_ccs[i_seq],
          fmodel=map_stats.fmodel_values[i_seq],
          two_fofc=map_stats.two_fofc_values[i_seq],
          fofc=map_stats.fofc_values[i_seq],
          anom=map_stats.anom_values[i_seq],
          n_hbonds=None) # TODO
        if (w.is_bad_water()) :
          w.outlier = True
          self.n_bad += 1
        elif (w.is_heavy_atom()) :
          w.outlier = True
          self.n_heavy += 1
        if (w.outlier) or (collect_all) :
          self.results.append(w)
    self.n_outliers = len(self.results)
Ejemplo n.º 30
0
 def __init__ (self,
     pdb_hierarchy,
     outliers_only=False,
     show_errors=False,
     out=sys.stdout,
     quiet=False) :
   validation.__init__(self)
   self.n_allowed = 0
   self.n_favored = 0
   self.n_type = [ 0 ] * 6
   from mmtbx.validation import utils
   import mmtbx.rotamer
   from mmtbx.rotamer import ramachandran_eval
   from scitbx.array_family import flex
   self._outlier_i_seqs = flex.size_t()
   pdb_atoms = pdb_hierarchy.atoms()
   all_i_seqs = pdb_atoms.extract_i_seq()
   if (all_i_seqs.all_eq(0)) :
     pdb_atoms.reset_i_seq()
   use_segids = utils.use_segids_in_place_of_chainids(
     hierarchy=pdb_hierarchy)
   analysis = ""
   output_list = []
   r = ramachandran_eval.RamachandranEval()
   prev_rezes, next_rezes = None, None
   prev_resid = None
   cur_resseq = None
   next_resseq = None
   for model in pdb_hierarchy.models():
     for chain in model.chains():
       if use_segids:
         chain_id = utils.get_segid_as_chainid(chain=chain)
       else:
         chain_id = chain.id
       residues = list(chain.residue_groups())
       for i, residue_group in enumerate(residues):
         # The reason I pass lists of atom_groups to get_phi and get_psi is to
         # deal with the particular issue where some residues have an A alt
         # conf that needs some atoms from a "" alt conf to get calculated
         # correctly.  See 1jxt.pdb for examples.  This way I can search both
         # the alt conf atoms and the "" atoms if necessary.
         prev_atom_list, next_atom_list, atom_list = None, None, None
         if cur_resseq is not None:
           prev_rezes = rezes
           prev_resseq = cur_resseq
         rezes = construct_complete_residues(residues[i])
         cur_resseq = residue_group.resseq_as_int()
         cur_icode = residue_group.icode.strip()
         if (i > 0):
           #check for insertion codes
           if (cur_resseq == residues[i-1].resseq_as_int()) :
             if (cur_icode == '') and (residues[i-1].icode.strip() == '') :
               continue
           elif (cur_resseq != (residues[i-1].resseq_as_int())+1):
             continue
         if (i < len(residues)-1):
           #find next residue
           if residue_group.resseq_as_int() == \
              residues[i+1].resseq_as_int():
             if (cur_icode == '') and (residues[i+1].icode.strip() == '') :
               continue
           elif residue_group.resseq_as_int() != \
              (residues[i+1].resseq_as_int())-1:
             continue
           next_rezes = construct_complete_residues(residues[i+1])
           next_resid = residues[i+1].resseq_as_int()
         else:
           next_rezes = None
           next_resid = None
         for atom_group in residue_group.atom_groups():
           alt_conf = atom_group.altloc
           if rezes is not None:
             atom_list = rezes.get(alt_conf)
           if prev_rezes is not None:
             prev_atom_list = prev_rezes.get(alt_conf)
             if (prev_atom_list is None):
               prev_keys = sorted(prev_rezes.keys())
               prev_atom_list = prev_rezes.get(prev_keys[0])
           if next_rezes is not None:
             next_atom_list = next_rezes.get(alt_conf)
             if (next_atom_list is None):
               next_keys = sorted(next_rezes.keys())
               next_atom_list = next_rezes.get(next_keys[0])
           phi = get_phi(prev_atom_list, atom_list)
           psi = get_psi(atom_list, next_atom_list)
           coords = get_center(atom_group)
           if (phi is not None and psi is not None):
             res_type = RAMA_GENERAL
             self.n_total += 1
             if (atom_group.resname[0:3] == "GLY"):
               res_type = RAMA_GLYCINE
             elif (atom_group.resname[0:3] == "PRO"):
               is_cis = is_cis_peptide(prev_atom_list, atom_list)
               if is_cis:
                 res_type = RAMA_CISPRO
               else:
                 res_type = RAMA_TRANSPRO
             elif (isPrePro(residues, i)):
               res_type = RAMA_PREPRO
             elif (atom_group.resname[0:3] == "ILE" or \
                   atom_group.resname[0:3] == "VAL"):
               res_type = RAMA_ILE_VAL
             self.n_type[res_type] += 1
             value = r.evaluate(res_types[res_type], [phi, psi])
             ramaType = self.evaluateScore(res_type, value)
             is_outlier = ramaType == RAMALYZE_OUTLIER
             c_alphas = None
             # XXX only save kinemage data for outliers
             if is_outlier :
               c_alphas = []
               for atoms in [prev_atom_list, atom_list, next_atom_list] :
                 for a in atoms :
                   if (a.name.strip() == "CA") :
                     a_ = atom(pdb_atom=a)
                     c_alphas.append(c_alpha(
                       id_str=a_.atom_group_id_str(),
                       xyz=a_.xyz))
               assert (len(c_alphas) == 3)
             result = ramachandran(
               chain_id=chain_id,
               resseq=residue_group.resseq,
               icode=residue_group.icode,
               resname=atom_group.resname,
               altloc=atom_group.altloc,
               segid=None, # XXX ???
               phi=phi,
               psi=psi,
               rama_type=ramaType,
               res_type=res_type,
               score=value*100,
               outlier=is_outlier,
               xyz=coords,
               c_alphas=c_alphas)
             if (not outliers_only or is_outlier) :
               self.results.append(result)
             if is_outlier :
               i_seqs = atom_group.atoms().extract_i_seq()
               assert (not i_seqs.all_eq(0))
               self._outlier_i_seqs.extend(i_seqs)
   out_count, out_percent = self.get_outliers_count_and_fraction()
   fav_count, fav_percent = self.get_favored_count_and_fraction()
   self.out_percent = out_percent * 100.0
   self.fav_percent = fav_percent * 100.0