def exercise () :
  pdb_file = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/1ywf.pdb",
    test=os.path.isfile)
  if (pdb_file is None) :
    print "phenix_regression not available, skipping test."
    return
  from iotbx.command_line.pdb_add_conformations import run
  out = cStringIO.StringIO()
  run([pdb_file], out=out)
  assert contains_lines(out.getvalue(), "Modified model: 4254 atoms")
  out = cStringIO.StringIO()
  run([pdb_file, "atom_selection=\"chain A and not resname HOH\""], out=out)
  assert contains_lines(out.getvalue(), "Modified model: 3990 atoms")
  run([pdb_file, "new_occ=0.4", "atom_selection=\"resseq 1:275\""], out=out)
  from iotbx import file_reader
  pdb_in = file_reader.any_file("1ywf_split.pdb", force_type="pdb").file_object
  atoms = pdb_in.input.atoms()
  occ = atoms.extract_occ()
  assert (occ.count(0.6) == occ.count(0.4) == 1858)
  out = cStringIO.StringIO()
  run([pdb_file, "n_confs=3", "new_occ=0.25"], out=out)
  pdb_in = file_reader.any_file("1ywf_split.pdb", force_type="pdb").file_object
  assert contains_lines(out.getvalue(), """\
WARNING: zero-occupancy atom:
HETATM 1940  O  AHOH A 354      -0.009  56.525  -3.872  0.25 29.17           O\
""")
  atoms = pdb_in.input.atoms()
  assert (atoms.size() == 6381)
  occ = atoms.extract_occ()
  assert (occ.count(0.5) == 2126) and (occ.count(0.25) == 4254)
  try :
    run([pdb_file, "atom_selection=\"chain G\""], out=out)
  except Sorry, e :
    assert (str(e) == "Empty selection.")
def exercise_twin_detwin () :
  random.seed(12345)
  flex.set_random_seed(12345)
  xrs = random_structure.xray_structure(
    unit_cell=(12,5,12,90,90,90),
    space_group_symbol="P1",
    n_scatterers=12,
    elements="random")
  fc = abs(xrs.structure_factors(d_min=1.5).f_calc())
  fc = fc.set_observation_type_xray_amplitude()
  mtz_file = "tmp_massage_in.mtz"
  fc.as_mtz_dataset(column_root_label="F").mtz_object().write(mtz_file)
  massage_data.run(
    args=[
      mtz_file,
      "aniso.action=None",
      "outlier.action=None",
      "symmetry.action=twin",
      "twin_law='l,-k,h'",
      "fraction=0.3",
      "hklout=tmp_massage_twinned.mtz",
    ],
    out=null_out())
  assert op.isfile("tmp_massage_twinned.mtz")
  mtz_in = file_reader.any_file("tmp_massage_twinned.mtz")
  fc_twin = mtz_in.file_server.miller_arrays[0].f_sq_as_f()
  fc_twin, fc_tmp = fc_twin.common_sets(other=fc)
  for hkl, f1, f2 in zip(fc_tmp.indices(), fc_tmp.data(), fc_twin.data()) :
    if (abs(hkl[0]) != abs(hkl[2])) :
      assert not approx_equal(f1, f2, eps=0.01, out=null_out()), (hkl, f1, f2)
  massage_data.run(
    args=[
      mtz_file,
      "aniso.action=None",
      "outlier.action=None",
      "symmetry.action=twin",
      "twin_law='l,-k,h'",
      "fraction=0.3",
      "hklout=tmp_massage_twinned.sca",
    ],
    out=null_out())
  assert op.isfile("tmp_massage_twinned.sca")
  massage_data.run(
    args=[
      "tmp_massage_twinned.mtz",
      "aniso.action=None",
      "outlier.action=None",
      "symmetry.action=detwin",
      "twin_law='l,-k,h'",
      "fraction=0.3",
      "hklout=tmp_massage_detwinned.mtz",
    ],
    out=null_out())
  mtz_in = file_reader.any_file("tmp_massage_detwinned.mtz")
  fc_detwin = mtz_in.file_server.miller_arrays[0].f_sq_as_f()
  fc_detwin, fc_tmp = fc_detwin.common_sets(other=fc)
  # XXX we appear to lose some accuracy here, possibly due to the use of
  # MTZ format
  for hkl, f1, f2 in zip(fc_tmp.indices(), fc_tmp.data(), fc_detwin.data()) :
    assert approx_equal(f1, f2, eps=0.01), hkl
Example #3
0
def group_chains_and_sequences (seq_file, pdb_file, **kwds) :
  from iotbx import file_reader
  seq_in = file_reader.any_file(seq_file,
    raise_sorry_if_errors=True,
    raise_sorry_if_not_expected_format=True)
  if (seq_in.file_type != "seq") :
    raise Sorry("Can't parse %s as a sequence file.")
  pdb_in = file_reader.any_file(pdb_file,
    raise_sorry_if_errors=True,
    raise_sorry_if_not_expected_format=True)
  if (pdb_in.file_type != "pdb") :
    raise Sorry("Can't parse %s as a PDB or mmCIF file.")
  kwds['pdb_hierarchy'] = pdb_in.file_object.hierarchy
  kwds['sequences'] = seq_in.file_object
  v = validation(**kwds)
  chain_to_sequence_mappings = {}
  sequence_to_chain_mappings = {}
  for chain in v.chains :
    seq_id = chain.sequence_id
    chain_id = chain.chain_id
    if (seq_id is None) :
      raise Sorry("Can't map chain %s to a sequence in %s." % (chain_id,
        seq_file))
    sequence = seq_in.file_object[seq_id].sequence
    if (chain_id in chain_to_sequence_mappings) :
      if (chain_to_sequence_mappings[chain_id] != sequence) :
        raise Sorry("Multiple unique chains named '%s'" % chain_id)
    else :
      chain_to_sequence_mappings[chain_id] = sequence
    if (not chain.sequence in sequence_to_chain_mappings) :
      sequence_to_chain_mappings[sequence] = []
    sequence_to_chain_mappings[sequence].append(chain_id)
  return sequence_to_chain_mappings
Example #4
0
def exercise_alignment () :
  aln1 = """\
>1mru_A
-----------------GSHMTTPSHLSD-----RYELGEILGFGGMSEVHLARDLRLHR
DVAVKVLRADLARDPSFYLRFRREAQNAAALNHPAIVAVYDTGEAETPAGPLPYIVMEYV
DGVTLRDIVHTEGPMTPKRAIEVIADACQALNFSHQNGIIHRDVKPANIMISATNAVKVM
DFGIARAIADSGNSVTQTAAVIGTAQYLSPEQARGDSVDARSDVYSLGCVLYEVLTGEPP
FTGDSPVSVAYQHVREDPIPPSARHEGLSADLDAVVLKALAKNPENRYQTAAEMRADLVR
VHNGEPPEAPKVLTDAERTSLLSSAAGNLSGPR
>2h34_A
MGSSHHHHHHSSGLVPRGSHMDGTAESREGTQFGPYRLRRLVGRGGMGDVYEAEDTVRER
IVALKLMSETLSSDPVFRTRMQREARTAGRLQEPHVVPIHDFGEID---GQL-YVDMRLI
NGVDLAAMLRRQGPLAPPRAVAIVRQIGSALDAAHAAGATHRDVKPENILVSADDFAYLV
DFGIASATTD--EKLTQLGNTVGTLYYMAPERFSESHATYRADIYALTCVLYECLTGSPP
YQGDQ-LSVMGAHINQAIPRPSTVRPGIPVAFDAVIARGMAKNPEDRYVTCGDLSA----
-----AAHAALATADQDRATDILR--------R"""
  open("seqs.aln", "w").write(aln1)
  f = any_file("seqs.aln")
  f.assert_file_type("aln")
  assert (f.file_object.names == ["1mru_A", "2h34_A"])
  aln2 = """\
MUSCLE (3.8) multiple sequence alignment


1mru_A          -----------------GSHMTTPSHLSD-----RYELGEILGFGGMSEVHLARDLRLHR
2h34_A          MGSSHHHHHHSSGLVPRGSHMDGTAESREGTQFGPYRLRRLVGRGGMGDVYEAEDTVRER
                                 ****  .:   :      * *  ::* ***.:*: * *    *

1mru_A          DVAVKVLRADLARDPSFYLRFRREAQNAAALNHPAIVAVYDTGEAETPAGPLPYIVMEYV
2h34_A          IVALKLMSETLSSDPVFRTRMQREARTAGRLQEPHVVPIHDFGEID---GQL-YVDMRLI
                 **:*::   *: ** *  *:.***..*. *: * :*.::* ** :   * * *: *  :"""
  open("seqs.aln", "w").write(aln2)
  f = any_file("seqs.aln")
  f.assert_file_type("aln")
  assert (f.file_object.names == ["1mru_A", "2h34_A"])
Example #5
0
def exercise () :
  #
  # Test command-line program
  #
  pdb_in, mtz_in = make_inputs()
  pdb_file = file_reader.any_file(pdb_in, force_type="pdb")
  hierarchy = pdb_file.file_object.hierarchy
  old_ligand = None
  for chain in hierarchy.only_model().chains() :
    if (chain.id != "B") : continue
    for residue_group in chain.residue_groups() :
      atom_group = residue_group.only_atom_group()
      if (atom_group.resname == "ACT") :
        old_ligand = atom_group.detached_copy()
        residue_group.remove_atom_group(atom_group)
        break
  assert old_ligand is not None
  open("tst_ligand_ncs_start.pdb", "w").write(hierarchy.as_pdb_string(
    crystal_symmetry=pdb_file.file_object.crystal_symmetry()))
  args = [
    "tst_ligand_ncs_start.pdb",
    mtz_in,
    "ligand_code=ACT",
  ]
  from mmtbx.command_line import apply_ncs_to_ligand
  if op.isfile("ncs_ligands.pdb") :
    os.remove("ncs_ligands.pdb")
  result = apply_ncs_to_ligand.run(args=args, out=null_out())
  assert result.n_ligands_new == 1
  assert op.isfile("ncs_ligands.pdb")
  pdb_out = file_reader.any_file("ncs_ligands.pdb", force_type="pdb")
  hierarchy_new = pdb_out.file_object.hierarchy
  new_ligand = None
  for chain in hierarchy_new.only_model().chains() :
    if (chain.id != "B") : continue
    for residue_group in chain.residue_groups() :
      atom_group = residue_group.only_atom_group()
      if (atom_group.resname == "ACT") :
        new_ligand = atom_group.detached_copy()
  assert new_ligand is not None
  rmsd = old_ligand.atoms().extract_xyz().rms_difference(
    new_ligand.atoms().extract_xyz())
  assert (rmsd < 0.5)
  #
  # Unit tests
  #
  import mmtbx.ncs.ligands
  operators = mmtbx.ncs.ligands.find_ncs_operators(hierarchy,
    log=null_out())
  assert len(operators) == 1
  group_ops = operators[0]
  assert len(group_ops) == 2
  assert (len(group_ops[0].selection) == 7)
  for g_op in group_ops:
    out = StringIO()
    g_op.show_summary(out=out, prefix=" ")
    assert out.getvalue().count("Rotation:") == 1
Example #6
0
def run_post_refinement (
    pdb_file,
    map_coeffs_file,
    output_file=None,
    params=None,
    f_map_label="2FOFCWT",
    diff_map_label="FOFCWT",
    model_map_label="F-model",
    write_model=True,
    out=None) :
  if (out is None) : out = sys.stdout
  if (params is None) :
    params = get_master_phil().fetch().extract().prune
  from iotbx import file_reader
  pdb_in = file_reader.any_file(pdb_file, force_type="pdb")
  pdb_in.assert_file_type("pdb")
  pdb_hierarchy = pdb_in.file_object.hierarchy
  pdb_hierarchy.atoms().reset_i_seq()
  # XXX this probably shouldn't be necessary
  pdb_hierarchy.atoms().set_chemical_element_simple_if_necessary()
  mtz_in = file_reader.any_file(map_coeffs_file, force_type="hkl")
  mtz_in.assert_file_type("hkl")
  f_map_coeffs = diff_map_coeffs = model_map_coeffs = None
  for array in mtz_in.file_server.miller_arrays :
    labels = array.info().labels
    if (labels[0] == f_map_label) :
      f_map_coeffs = array
    elif (labels[0] == diff_map_label) :
      diff_map_coeffs = array
    elif (labels[0] in [model_map_label, model_map_label + "(+)"]) :
      model_map_coeffs = array.average_bijvoet_mates()
  if (f_map_coeffs is None) :
    raise RuntimeError("2mFo-DFc map not found (expected labels %s)." %
      f_map_label)
  elif (diff_map_coeffs is None) :
    raise RuntimeError("mFo-DFc map not found (expected labels %s)." %
      diff_map_label)
  elif (model_map_coeffs is None) :
    raise RuntimeError("Fc map not found (expected labels %s)." %
      model_map_label)
  result = prune_model(
    f_map_coeffs=f_map_coeffs,
    diff_map_coeffs=diff_map_coeffs,
    model_map_coeffs=model_map_coeffs,
    pdb_hierarchy=pdb_hierarchy,
    params=params).process_residues(out=out)
  if (write_model) :
    if (output_file is None) :
      base_name = os.path.basename(pdb_file)
      output_file = os.path.splitext(base_name)[0] + "_pruned.pdb"
    f = open(output_file, "w")
    f.write("%s\n" % "\n".join(
      pdb_in.file_object.input.crystallographic_section()))
    f.write(pdb_hierarchy.as_pdb_string())
    f.close()
    result.output_file = output_file
  return result
Example #7
0
def run (args, out=sys.stdout) :
  from cctbx import french_wilson
  from iotbx import file_reader
  hkl_file = None
  sources = []
  interpreter = master_phil.command_line_argument_interpreter()
  for arg in args :
    if os.path.isfile(arg) :
      input_file = file_reader.any_file(arg)
      if (input_file.file_type == "hkl") :
        hkl_file = input_file
        sources.append(interpreter.process(arg="file_name=\"%s\"" % arg))
      elif (input_file.file_type == "phil") :
        sources.append(input_file.file_object)
    else :
      arg_phil = interpreter.process(arg=arg)
      sources.append(arg_phil)
  work_phil = master_phil.fetch(sources=sources)
  work_params = work_phil.extract()
  if (work_params.french_wilson.file_name is None) :
    if (hkl_file is None) :
      raise Usage("phenix.french_wilson data.mtz [params.eff] [options ...]")
    else :
      work_params.french_wilson.file_name = hkl_file.file_name
  elif (hkl_file is None) :
    hkl_file = file_reader.any_file(work_params.french_wilson.file_name)
  params = work_params.french_wilson
  xray_data_server = hkl_file.file_server
  crystal_symmetry = xray_data_server.miller_arrays[0].crystal_symmetry()
  if (crystal_symmetry is None) :
    raise Sorry("No crystal symmetry found.  This program requires an input "+
      "format with complete symmetry information.")
  unit_cell = xray_data_server.miller_arrays[0].unit_cell()
  if (unit_cell is None) :
    raise Sorry("No unit cell found.  This program requires an input "+
      "format with complete unit cell information.")
  i_obs = None
  i_obs = xray_data_server.get_xray_data(
    file_name = params.file_name,
    labels = params.intensity_labels,
    ignore_all_zeros = True,
    parameter_scope = 'french_wilson',
    parameter_name = 'intensity_labels')
  import cStringIO
  xray_data_server.err = cStringIO.StringIO()
  try :
    r_free_flags, test_flag_value = xray_data_server.get_r_free_flags(
      file_name = params.file_name,
      label = params.r_free_flags.label,
      test_flag_value = None,
      disable_suitability_test = False,
      parameter_scope = "french_wilson.r_free_flags")
  except Sorry, e :
    r_free_flags = None
def check_files(phil_scope, file_type, error_message):
  if (phil_scope is not None):
    if (isinstance(phil_scope, list)):
      for file_name in phil_scope:
        f = file_reader.any_file(file_name)
        if (f.file_type != file_type):
          raise Sorry(error_message)
    else:
      f = file_reader.any_file(phil_scope)
      if (f.file_type != file_type):
        raise Sorry(error_message)
Example #9
0
def exercise_maps () :
  xplor_map = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/misc/cns.map",
    test=os.path.isfile)
  if xplor_map is not None :
    f = any_file(xplor_map)
    assert f.file_type == "xplor_map"
  ccp4_map = libtbx.env.under_dist(
    module_name="iotbx",
    path="ccp4_map/tst_input.map")
  f = any_file(ccp4_map)
  assert f.file_type == "ccp4_map"
def exercise_1():
  pdb_file = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/1yjp_h.pdb",
    test=os.path.isfile)
  mtz_file = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/reflection_files/1yjp.mtz",
    test=os.path.isfile)
  if (None in [pdb_file, mtz_file]) :
    print "phenix_regression not found, skipping test"
    return False
  pdb_in = file_reader.any_file(pdb_file)
  hierarchy = pdb_in.file_object.hierarchy
  hierarchy.atoms().reset_i_seq()
  xrs = pdb_in.file_object.xray_structure_simple()
  mtz_in = file_reader.any_file(mtz_file)
  f_obs = mtz_in.file_server.miller_arrays[0]
  r_free = mtz_in.file_server.miller_arrays[1]
  r_free = r_free.customized_copy(data=(r_free.data()==1))
  fmodel = mmtbx.utils.fmodel_simple(
    f_obs=f_obs,
    r_free_flags=r_free,
    xray_structures=[xrs],
    scattering_table="n_gaussian")
  map_stats = real_space_correlation.map_statistics_for_fragment(
    fragment=hierarchy,
    fmodel=fmodel)
  assert approx_equal(map_stats.cc, 0.960, eps=0.01)
  edm = fmodel.electron_density_map()
  map1_coeffs = edm.map_coefficients("2mFo-DFc")
  map1 = map1_coeffs.fft_map(
    resolution_factor=0.25).apply_sigma_scaling().real_map()
  map2_coeffs = edm.map_coefficients("Fmodel")
  map2 = map2_coeffs.fft_map(
    resolution_factor=0.25).apply_sigma_scaling().real_map()
  xray_structure = fmodel.xray_structure
  map_stats2 = real_space_correlation.map_statistics_for_atom_selection(
    atom_selection=flex.bool(xrs.sites_cart().size(), True),
    map1=map1,
    map2=map2,
    xray_structure=xrs)
  assert approx_equal(map_stats2.cc, map_stats.cc, 0.01)
  # XXX other code outside cctbx depends on the current API - do not simply
  # change the test if this breaks!
  results = real_space_correlation.simple(
    fmodel=fmodel,
    pdb_hierarchy=hierarchy,
    log=null_out())
  assert isinstance(results, list)
  assert isinstance(results[0], group_args)
  assert (results[0].n_atoms == 1)
  assert (results[0].id_str == " A   GLY    1    N  ")
  return True
Example #11
0
def get_sequence_n_copies_from_files (seq_file, pdb_file, **kwds) :
  from iotbx import file_reader
  seq_in = file_reader.any_file(seq_file,
    raise_sorry_if_errors=True,
    raise_sorry_if_not_expected_format=True)
  if (seq_in.file_type != "seq") :
    raise Sorry("Can't parse %s as a sequence file.")
  pdb_in = file_reader.any_file(pdb_file,
    raise_sorry_if_errors=True,
    raise_sorry_if_not_expected_format=True)
  if (pdb_in.file_type != "pdb") :
    raise Sorry("Can't parse %s as a PDB or mmCIF file.")
  kwds['pdb_hierarchy'] = pdb_in.file_object.hierarchy
  kwds['sequences'] = seq_in.file_object
  return get_sequence_n_copies(**kwds)
Example #12
0
def run(args=(), params=None, out=sys.stdout):
  from iotbx.pdb.remediation import remediator
  from iotbx import file_reader
  if (params is None) :
    interpreter = master_phil.command_line_argument_interpreter()
    pdb_file = None
    sources = []
    for arg in args :
      if os.path.isfile(arg) :
        input_file = file_reader.any_file(arg)
        if (input_file.file_type == "pdb") :
          pdb_file = input_file
          sources.append(interpreter.process(arg="file_name=\"%s\"" % arg))
      else :
        arg_phil = interpreter.process(arg=arg)
        sources.append(arg_phil)
    work_phil = master_phil.fetch(sources=sources)
    work_params = work_phil.extract()
  else : # XXX for phenix GUI
    work_params = params
    if (work_params.remediator.output_file is None) :
      base, ext = os.path.splitext(work_params.remediator.file_name)
      work_params.remediator.output_file = base + "_remediated.pdb"
  if (work_params.remediator.file_name is None) :
    if (pdb_file is None) :
      summary = remediator.get_summary()
      raise Usage(summary)
    else :
      work_params.remediator.file_name = pdb_file.file_name
  params = work_params.remediator
  remediator.remediator(params)
  return work_params.remediator.output_file
def generate_magnessium_inputs (file_base="mg_frag", anonymize=True) :
  """
  Creates a fake model and reflection data for a structure containing magnesium
  ions.

  Parameters
  ----------
  file_base : str, optional
  anonymize : bool, optional
      Replace all ions in the returned pdb file with waters.

  Returns
  -------
  mtz_path : str
  pdb_path : str
  """
  pdb_file = write_pdb_input_magnessium_binding (file_base=file_base)
  mtz_file = generate_mtz_file(
    file_base=file_base,
    d_min=1.5)
  assert os.path.isfile(pdb_file) and os.path.isfile(mtz_file)
  if anonymize:
    pdb_in = any_file(pdb_file)
    hierarchy = pdb_in.file_object.hierarchy
    hierarchy, n = anonymize_ions(hierarchy, log=null_out())
    pdb_file = file_base + "_hoh.pdb"
    hierarchy.write_pdb_file(
      file_name=pdb_file,
      crystal_symmetry=pdb_in.file_object.crystal_symmetry())
    assert os.path.isfile(pdb_file)
  return os.path.abspath(mtz_file), os.path.abspath(pdb_file)
def run (args, out=sys.stdout) :
  import optparse
  if (len(args) == 0) or ("--help" in args) :
    raise Usage("""
mmtbx.validation_summary model.pdb

Prints a brief summary of validation criteria, including Ramachandran
statistics, rotamer outliers, clashscore, C-beta deviations, plus R-factors
and RMS(bonds)/RMS(angles) if found in PDB header.  (This is primarily used
for evaluating the output of refinement tests; general users are advised to
run phenix.model_vs_data or the validation GUI.)
""")
  parser = optparse.OptionParser()
  options, args = parser.parse_args(args)
  pdb_file = args[0]
  if (not os.path.isfile(pdb_file)) :
    raise Sorry("Not a file: %s" % pdb_file)
  from iotbx.file_reader import any_file
  pdb_in = any_file(pdb_file, force_type="pdb").check_file_type("pdb")
  hierarchy = pdb_in.file_object.hierarchy
  xrs = pdb_in.file_object.input.xray_structures_simple()
  s = None
  extra = ""
  if (len(xrs) == 1) :
    s = summary(pdb_file=pdb_file)
  else :
    s = ensemble(pdb_hierarchy=hierarchy,
      n_models=len(xrs))
    extra = " (%d models)" % len(xrs)
  print >> out, ""
  print >> out, "Validation summary for %s%s:" % (pdb_file, extra)
  s.show(out=out, prefix="  ", show_percentiles=True)
  print >> out, ""
  return s
Example #15
0
def exercise_heavy () :
  from mmtbx.regression import make_fake_anomalous_data
  from mmtbx.command_line import validate_waters
  import mmtbx.ions.utils
  from iotbx.file_reader import any_file
  file_base = "tst_validate_waters_1"
  pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl(file_base=file_base)
  mtz_file = make_fake_anomalous_data.generate_mtz_file(
    file_base="tst_validate_waters_1",
    d_min=1.5,
    anomalous_scatterers=[
      group_args(selection="element CD", fp=-0.29, fdp=2.676),
      group_args(selection="element CL", fp=0.256, fdp=0.5),
    ])
  pdb_in = any_file(pdb_file)
  hierarchy = pdb_in.file_object.hierarchy
  hierarchy, n = mmtbx.ions.utils.anonymize_ions(hierarchy, log=null_out())
  hierarchy.write_pdb_file("%s_start.pdb" % file_base,
    crystal_symmetry=pdb_in.file_object.crystal_symmetry())
  args = ["tst_validate_waters_1_start.pdb", "tst_validate_waters_1.mtz",
    "skip_twin_detection=True"]
  results = validate_waters.run(args=args, out=null_out())
  out = StringIO()
  results.show(out=out)
  s = easy_pickle.dumps(results)
  r2 = easy_pickle.loads(s)
  out2 = StringIO()
  r2.show(out=out2)
  assert not show_diff(out.getvalue(), out2.getvalue())
  assert (results.n_bad >= 1) and (results.n_heavy == 2)
Example #16
0
def extract_phenix_refine_map_coeffs (mtz_file, limit_arrays=None) :
  assert (limit_arrays is None) or (isinstance(limit_arrays, list))
  if not os.path.isfile(mtz_file) :
    raise Sorry("No map coefficients are available for conversion.")
  mtz_in = file_reader.any_file(mtz_file)
  mtz_in.assert_file_type("hkl")
  miller_arrays = mtz_in.file_server.miller_arrays
  assert len(miller_arrays) > 0
  map_names = {"2FOFCWT" : "2mFo-DFc",
               "FOFCWT" : "mFo-DFc",
               "2FOFCWT_no_fill" : "2mFo-DFc_no_fill",
               "FOFCWT_no_fill" : "mFo-DFc_no_fill"}
  output_arrays = []
  for miller_array in miller_arrays :
    if miller_array.is_complex_array() :
      labels = miller_array.info().label_string()
      if labels.startswith("F-model") :
        continue
      if (limit_arrays is not None) and (not labels in limit_arrays) :
        continue
      f_label = miller_array.info().labels[0]
      map_name = map_names.get(f_label)
      if map_name is None :
        map_name = f_label
      output_arrays.append((miller_array, map_name))
  return output_arrays
 def import_r_free_flags (self, F) :
   params = self.params.r_free_flags
   out = self.out
   from iotbx import file_reader
   rfree_in = file_reader.any_file(params.file_name)
   rfree_in.assert_file_type("hkl")
   hkl_server = rfree_in.file_server
   r_free_raw, flag_value = hkl_server.get_r_free_flags(
     file_name=None,
     label=params.label,
     test_flag_value=None,
     parameter_scope="simulate_data.r_free_flags",
     disable_suitability_test=False)
   r_free = r_free_raw.customized_copy(data=r_free_raw.data() == flag_value)
   r_free = r_free.map_to_asu().common_set(F)
   print >> out, "  Using R-free flags from %s:%s" % (rfree_in.file_name,
     r_free_raw.info().label_string())
   if (F.data().size() != r_free.data().size()) :
     n_missing = F.data().size() - r_free.data().size()
     assert (n_missing > 0)
     if (params.missing_flags == "discard") :
       print >> out, "    discarding %d amplitudes without R-free flags" % \
         n_missing
       F = F.common_set(r_free)
     else :
       print >> out, "    generating missing R-free flags for %d reflections" %\
         n_missing
       missing_set = F.lone_set(r_free)
       missing_flags = missing_set.generate_r_free_flags(
         fraction=r_free.data().count(True) / r_free.data().size(),
         max_free=None,
         use_lattice_symmetry=True)
       r_free = r_free.concatenate(other=missing_flags)
   assert (F.data().size() == r_free.data().size())
   return F, r_free
def exercise_cns_input () :
  from mmtbx.regression import make_fake_anomalous_data
  pdb_file, mtz_file = make_fake_anomalous_data.generate_cd_cl_inputs(
    file_base="tst_cmdline_cns")
  from iotbx.file_reader import any_file
  mtz_in = any_file("tst_cmdline_cns.mtz")
  f_obs = mtz_in.file_server.miller_arrays[0].average_bijvoet_mates()
  flags = mtz_in.file_server.miller_arrays[1].average_bijvoet_mates()
  f = open("tst_cmdline_cns.hkl", "w")
  out = StringIO()
  f_obs.export_as_cns_hkl(
    file_object=out,
    r_free_flags=flags)
  # get rid of embedded symmetry
  for line in out.getvalue().splitlines() :
    if (not "{" in line) :
      f.write("%s\n" % line)
  f.close()
  cmdline = mmtbx.command_line.load_model_and_data(
    args=["tst_cmdline_cns.pdb", "tst_cmdline_cns.hkl"],
    master_phil=mmtbx.command_line.generic_simple_input_phil(),
    process_pdb_file=False,
    create_fmodel=True,
    out=null_out())
  out = StringIO()
  cmdline.crystal_symmetry.show_summary(f=out)
  assert (out.getvalue() == """\
Unit cell: (21.362, 23.436, 23.594, 90, 90, 90)
Space group: P 1 (No. 1)
"""), out.getvalue()
def exercise():
    for module in ["reduce", "probe", "phenix_regression"]:
        if not libtbx.env.has_module(module):
            print "%s not available, skipping" % module
            return
    from mmtbx.command_line import validation_summary
    from iotbx import file_reader
    import iotbx.pdb.hierarchy

    regression_pdb = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile
    )
    out = StringIO()
    summary = validation_summary.run(args=[regression_pdb], out=out)
    assert approx_equal(summary.clashscore, 13.597, eps=0.0001)
    ss = easy_pickle.dumps(summary)
    sss = easy_pickle.loads(ss)
    out_1 = StringIO()
    out_2 = StringIO()
    summary.show(out=out_1)
    sss.show(out=out_2)
    assert out_1.getvalue() == out_2.getvalue()
    pdb_in = file_reader.any_file(regression_pdb)
    hierarchy = pdb_in.file_object.hierarchy
    new_hierarchy = iotbx.pdb.hierarchy.root()
    for i in range(5):
        model = hierarchy.only_model().detached_copy()
        model.id = str(i + 1)
        new_hierarchy.append_model(model)
    open("tst_validation_summary.pdb", "w").write(new_hierarchy.as_pdb_string())
    out2 = StringIO()
    summary = validation_summary.run(args=["tst_validation_summary.pdb"], out=out2)
    assert type(summary).__name__ == "ensemble"
    print "OK"
def get_rotamers (file_name) :
  pdb_in = file_reader.any_file(file_name)
  hierarchy = pdb_in.file_object.hierarchy
  validate = rotalyze.rotalyze(pdb_hierarchy=hierarchy,
    data_version="8000",
    outliers_only=False)
  return [ (r.id_str(), r.rotamer_name) for r in validate.results ]
Example #21
0
def validate_params(params):

  if ( (params.input.map_1 is None) or (params.input.map_2 is None) ):
    raise Sorry('Two CCP4-formatted maps are required.')

  # check files
  p = [params.input.map_1, params.input.map_2]
  maps = [None, None]
  for i in xrange(2):
    maps[i] = file_reader.any_file(p[i])
    if (maps[i].file_type != 'ccp4_map'):
      raise Sorry('Please input a CCP4-formatted map for %s.' % p[i])

  # check symmetry
  m1 = maps[0].file_object
  m2 = maps[1].file_object
  cs1 = crystal.symmetry(m1.unit_cell().parameters(), m1.space_group_number)
  cs2 = crystal.symmetry(m2.unit_cell().parameters(), m2.space_group_number)
  if (cs1.is_similar_symmetry(cs2) is False):
    raise Sorry('The symmetry of the two maps is not similar.')

  # check maps
  m1 = m1.map_data()
  m2 = m2.map_data()
  if ( (m1.accessor().all() != m2.accessor().all()) or
       (m1.accessor().focus() != m2.accessor().focus()) or
       (m1.accessor().origin() != m2.accessor().origin()) ):
    raise Sorry('The two maps are not similar.')

  return True
Example #22
0
 def __init__(self,pdb_file,hklmtz_file,
              detail,high_resolution=None,mdb_document=None,pdb_code=None,
              do_flips=False) :
   assert detail in ['file','residue'],detail
   assert type(do_flips) == bool
   self.pdb_file = pdb_file
   self.hklmtz_file = hklmtz_file
   self.detail = detail
   self.pdb_code = pdb_code
   self.high_resolution = high_resolution
   self.do_flips = do_flips
   if not pdb_code : self.pdb_code = 'N/A'
   pdb_in = file_reader.any_file(pdb_file)
   self.hierarchy = pdb_in.file_object.hierarchy
   args = [self.pdb_file]
   if self.hklmtz_file : args.append(self.hklmtz_file)
   self.cmdline = load_model_and_data(
     args=args,
     master_phil=generate_master_phil_with_inputs(""),
     require_data=False,
     create_fmodel=True,
     process_pdb_file=True,
     prefer_anomalous=True)
   # keys are res ids and values are MDBResidue objects.
   if self.detail == 'residue' :
     self.initiate_residues()
   self.set_mdb_document(mdb_document)
Example #23
0
def load_all_models_in_directory (dir_name,
    limit_extensions=True,
    recursive=False) :
  """
  Load all models in the specified directory, returning a list of file names
  and iotbx.file_reader objects.
  """
  from iotbx.file_reader import any_file, guess_file_type
  assert os.path.isdir(dir_name)
  file_names_and_objects = []
  for file_name in os.listdir(dir_name) :
    full_path = os.path.join(dir_name, file_name)
    if os.path.isdir(full_path) and recursive :
      file_names_and_objects.extend(
        load_all_models_in_directory(dir_name=full_path,
          limit_extensions=limit_extensions,
          recursive=True))
    elif os.path.isfile(full_path) :
      if (limit_extensions) and (guess_file_type(full_path) != "pdb") :
        continue
      input_file = any_file(full_path,
        raise_sorry_if_not_expected_format=True)
      if (input_file.file_type == "pdb") :
        file_names_and_objects.append((full_path, input_file.file_object))
  return file_names_and_objects
Example #24
0
def exercise () :
  import mmtbx.regression
  from iotbx import file_reader
  from cStringIO import StringIO
  pdb_file = "tmp_em_rscc.pdb"
  map_file = "tmp_em_rscc.map"
  f = open(pdb_file, "w")
  for line in mmtbx.regression.model_1yjp.splitlines() :
    if line.startswith("ATOM") :
      f.write(line + "\n")
  f.close()
  pdb_in = file_reader.any_file(pdb_file).file_object
  symm = crystal.symmetry(
    space_group_symbol="P1",
    unit_cell=(30, 30, 30, 90, 90, 90))
  xrs = pdb_in.input.xray_structure_simple(crystal_symmetry=symm)
  xrs.scattering_type_registry(
    d_min=3.0,
    table="electron")
  fc = xrs.structure_factors(d_min=3.0).f_calc()
  fft_map = fc.fft_map(resolution_factor=1/3).apply_sigma_scaling()
  i,j,k = fft_map.n_real()
  s = i//2
  f = i//2-1
  print i,j,k,s,f
  fft_map.as_ccp4_map(
    file_name=map_file,
    gridding_first=(-s,-s,-s),
    gridding_last=(f,f,f))
  out = StringIO()
  em_rscc.run(args=[pdb_file, map_file], out=out)
  for line in out.getvalue().splitlines():
    if line.find(" A  ")==-1: continue
    assert abs(float(line.split()[2])-1)<0.1
Example #25
0
def exercise_intensity_output () :
  if (os.path.isfile("tst_fmodel_anomalous.mtz")) :
    os.remove("tst_fmodel_anomalous.mtz")
  pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl(
    file_base="tst_fmodel_anomalous")
  # phenix.fmodel (with wavelength)
  args = [
    pdb_file,
    "high_resolution=1.0",
    "wavelength=1.116",
    "obs_type=intensities",
    "type=real",
    "output.file_name=tst_fmodel_intensity.mtz",
    "r_free_flags_fraction=0.1",
  ]
  args2 = args + ["label=Imodel"]
  fmodel.run(args=args2, log=null_out())
  assert os.path.isfile("tst_fmodel_intensity.mtz")
  mtz_in = file_reader.any_file("tst_fmodel_intensity.mtz")
  assert mtz_in.file_server.miller_arrays[0].is_xray_intensity_array()
  try :
    fmodel.run(args=args, log=null_out())
  except Sorry :
    pass
  else :
    raise Exception_expected
  try :
    fmodel.run(args=args+["format=cns"], log=null_out())
  except Sorry :
    pass
  else :
    raise Exception_expected
Example #26
0
def exercise () :
  if (os.path.isfile("tst_fmodel_anomalous.mtz")) :
    os.remove("tst_fmodel_anomalous.mtz")
  pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl(
    file_base="tst_fmodel_anomalous")
  # phenix.fmodel (with wavelength)
  args = [
    pdb_file,
    "high_resolution=1.0",
    "wavelength=1.116",
    "label=F",
    "type=real",
    "output.file_name=tst_fmodel_anomalous.mtz",
    "r_free_flags_fraction=0.1",
  ]
  fmodel.run(args=args, log=null_out())
  assert os.path.isfile("tst_fmodel_anomalous.mtz")
  mtz_in = file_reader.any_file("tst_fmodel_anomalous.mtz")
  array = mtz_in.file_server.miller_arrays[0]
  assert (array.anomalous_flag())
  anom_diffs = array.anomalous_differences()
  assert approx_equal(flex.max(anom_diffs.data()), 5.72, eps=0.01)
  # mmtbx.fmodel_simple
  result = easy_run.call(
    "mmtbx.fmodel_simple \"%s\" tst_fmodel_anomalous.mtz high_resolution=2.0"
      % pdb_file)
  print "OK"
Example #27
0
def run (args=(), params=None, out=sys.stdout) :
  assert (params is not None)
  seq_files = params.muscle.seq_file
  output_file = params.muscle.output_file
  if (output_file is None) or (output_file == "") :
    output_file = os.path.join(os.getcwd(), "muscle.aln")
  from iotbx import file_reader
  from iotbx.bioinformatics import any_sequence_format, sequence
  seqs = []
  for file_name in seq_files :
    if (file_name.endswith(".pdb") or file_name.endswith(".ent") or
        file_name.endswith(".pdb.gz") or file_name.endswith(".ent.gz")) :
      pdb_in = file_reader.any_file(file_name, force_type="pdb").file_object
      hierarchy = pdb_in.hierarchy
      first_model = hierarchy.models()[0]
      found_protein = False
      for chain in first_model.chains() :
        if chain.is_protein() :
          chain_seq = chain.as_padded_sequence()
          base_name = os.path.basename(file_name)
          seq_name = "%s_%s" % (os.path.splitext(base_name)[0], chain.id)
          seqs.append(sequence(chain_seq, seq_name))
          found_protein = True
      if (not found_protein) :
        raise Sorry(("The PDB file %s does not contain any recognizable "+
          "protein chains.") % file_name)
    else :
      try :
        seq_objects, non_compliant = any_sequence_format(file_name,
          assign_name_if_not_defined=True)
        seqs.extend(seq_objects)
      except Exception, e :
        raise Sorry(("Error parsing '%s' - not a recognizable sequence "+
          "format.  (Original message: %s)") % (file_name, str(e)))
def prepare_inputs (prefix="tst_build_alt_confs") :
  pdb_in = "%s_in.pdb" % prefix
  open(pdb_in, "w").write(pdb_raw)
  args = [
    pdb_in,
    "high_resolution=1.2",
    "type=real",
    "label=F",
    "add_sigmas=True",
    "r_free_flags_fraction=0.1",
    "random_seed=12345",
    "output.file_name=%s.mtz" % prefix,
  ]
  fmodel.run(args=args, log=null_out())
  pdb_file = file_reader.any_file(pdb_in)
  hierarchy = pdb_file.file_object.hierarchy
  xrs = pdb_file.file_object.xray_structure_simple()
  for chain in hierarchy.only_model().chains() :
    for residue_group in chain.residue_groups() :
      atom_groups = residue_group.atom_groups()
      if (len(atom_groups) > 1) :
        while (len(atom_groups) > 1) :
          residue_group.remove_atom_group(atom_groups[-1])
          del atom_groups[-1]
        for atom in residue_group.atoms() :
          atom.occ = 1.0
        atom_groups[0].altloc = ''
  assert hierarchy.atoms().extract_occ().all_eq(1.0)
  open("%s_start.pdb" % prefix, "w").write(
    hierarchy.as_pdb_string(crystal_symmetry=xrs))
Example #29
0
def extract_labels (params, out, parameter_scope="structure") :
  """
  Guess MTZ file column labels for experimental data and R-free flags.  Only
  invoked when this program is run from the command line, but the Phenix GUI
  does something similar.
  """
  for i, structure in enumerate(params.structure) :
    if (structure.mtz_file is None) :
      raise Sorry("Missing MTZ file for structure #%d." % (i+1))
    if ([structure.data_labels, structure.r_free_flags_label].count(None)>0) :
      mtz_file = file_reader.any_file(structure.mtz_file, force_type="hkl")
      mtz_file.assert_file_type("hkl")
      server = mtz_file.file_server
      file_name = mtz_file.file_name
      if (structure.data_labels is None) :
        print >>out, "Attempting to guess labels for %s..." % file_name
        data = server.get_xray_data(
          file_name=file_name,
          labels=None,
          ignore_all_zeros=True,
          parameter_scope=parameter_scope,
          parameter_name="data_labels")
        structure.data_labels = data.info().label_string()
      if (structure.r_free_flags_label is None) :
        print >>out, "Attempting to guess R-free label for %s..." % file_name
        rfree = server.get_r_free_flags(
          file_name=file_name,
          label=None,
          test_flag_value=None,
          disable_suitability_test=False,
          parameter_scope=parameter_scope+".r_free_flags")
        structure.r_free_flags_label = rfree[0].info().label_string()
def run (args=(), params=None, out=sys.stdout) :
  if (len(args) == 0) and (params is None) :
    raise Usage("iotbx.pdb.add_conformations model.pdb [selection=...]\n"+
      "Full parameters:\n" + master_phil.as_str())
  from iotbx import file_reader
  pdb_in = None
  if (params is None) :
    user_phil = []
    interpreter = master_phil.command_line_argument_interpreter(
      home_scope="")
    for arg in args :
      if os.path.isfile(arg) :
        f = file_reader.any_file(os.path.abspath(arg))
        if (f.file_type == "pdb") :
          pdb_in = f.file_object
          user_phil.append(libtbx.phil.parse(
            "add_conformations.pdb_file=\"%s\"" % f.file_name))
        elif (f.file_type == "phil") :
          user_phil.append(f.file_object)
        else :
          raise Sorry("Unknown file type '%s' (%s)" % (f.file_type, arg))
      else :
        try :
          arg_phil = interpreter.process(arg=arg)
        except RuntimeError, e :
          raise Sorry("Error parsing '%s': %s" % (arg, str(e)))
        else :
          user_phil.append(arg_phil)
Example #31
0
def get_inputs(args, log, master_params, validated):
    inputs = mmtbx.utils.process_command_line_args(
        args=args,
        master_params=master_params,
        suppress_symmetry_related_errors=True)
    params = inputs.params.extract()
    print params.model_file_name
    # Check model file
    if (len(inputs.pdb_file_names) == 0 and (params.model_file_name is None)):
        raise Sorry("No model file found.")
    elif (len(inputs.pdb_file_names) == 1):
        params.model_file_name = inputs.pdb_file_names[0]
    elif (len(inputs.pdb_file_names) > 1):
        #else:
        raise Sorry("Only one model file should be given")
    #
    # Check reflection file(s)
    reflection_files = inputs.reflection_files
    if (len(reflection_files) == 0):
        if (params.reflection_file_name is None):
            raise Sorry("No reflection file found.")
        else:
            hkl_in = file_reader.any_file(params.reflection_file_name,
                                          force_type="hkl")
            hkl_in.assert_file_type("hkl")
            reflection_files = [hkl_in.file_object]
    #
    # Get crystal symmetry
    crystal_symmetry = None
    crystal_symmetry = inputs.crystal_symmetry
    if (crystal_symmetry is None):
        crystal_symmetry = obtain_cs_if_gui_input(
            model_file_name=params.model_file_name,
            reflection_file_name=params.reflection_file_name)
    print >> log, "Working crystal symmetry after inspecting all inputs:"
    crystal_symmetry.show_summary(f=log, prefix="  ")
    #
    # Get data labels
    f_obs, r_free_flags = None, None
    rfs = reflection_file_utils.reflection_file_server(
        crystal_symmetry=crystal_symmetry,
        force_symmetry=True,
        reflection_files=reflection_files,
        err=StringIO())
    parameters = mmtbx.utils.data_and_flags_master_params().extract()
    if (params.data_labels is not None):
        parameters.labels = params.data_labels
    if (params.r_free_flags_labels is not None):
        parameters.r_free_flags.label = params.r_free_flags_labels
    determined_data_and_flags = mmtbx.utils.determine_data_and_flags(
        reflection_file_server=rfs,
        parameters=parameters,
        keep_going=True,
        working_point_group=crystal_symmetry.space_group(
        ).build_derived_point_group(),
        log=StringIO(),
        symmetry_safety_check=True)
    f_obs = determined_data_and_flags.f_obs
    if (params.data_labels is None):
        params.data_labels = f_obs.info().label_string()
    if (params.reflection_file_name is None):
        params.reflection_file_name = parameters.file_name
    r_free_flags = determined_data_and_flags.r_free_flags
    assert f_obs is not None
    print >> log, "Input data:"
    print >> log, "  Iobs or Fobs:", f_obs.info().labels
    if (r_free_flags is not None):
        print >> log, "  Free-R flags:", r_free_flags.info().labels
        params.r_free_flags_labels = r_free_flags.info().label_string()
    else:
        print >> log, "  Free-R flags: Not present"
    model_basename = os.path.basename(params.model_file_name.split(".")[0])
    if (len(model_basename) > 0 and params.output_file_name_prefix is None):
        params.output_file_name_prefix = model_basename
    new_params = master_params.format(python_object=params)
    print >> log, "*" * 79
    new_params.show()
    if (not validated):
        validate_params(params)
    pdb_input = iotbx.pdb.input(file_name=params.model_file_name)
    pdb_hierarchy = pdb_input.construct_hierarchy()
    xray_structure = pdb_hierarchy.extract_xray_structure(
        crystal_symmetry=crystal_symmetry)
    # DON'T USE:
    # xray_structure = pdb_input.xray_structure_simple()
    # because the atom order might be wrong
    mmtbx.utils.setup_scattering_dictionaries(
        scattering_table=params.scattering_table,
        xray_structure=xray_structure,
        d_min=f_obs.d_min())
    f_obs = f_obs.resolution_filter(d_min=params.high_resolution,
                                    d_max=params.low_resolution)
    if (r_free_flags is not None):
        r_free_flags = r_free_flags.resolution_filter(
            d_min=params.high_resolution, d_max=params.low_resolution)
    #
    # If data are anomalous
    if (f_obs.anomalous_flag()):
        f_obs, r_free_flags = prepare_f_obs_and_flags(
            f_obs=f_obs, r_free_flags=r_free_flags)
    return group_args(f_obs=f_obs,
                      r_free_flags=r_free_flags,
                      xray_structure=xray_structure,
                      pdb_hierarchy=pdb_hierarchy,
                      params=params)
def run(args, log=sys.stdout):
    if (len(args) == 0):
        print(legend, file=log)
        defaults(log=log)
        return
    #
    parsed = defaults(log=log)
    processed_args = mmtbx.utils.process_command_line_args(
        args=args, log=sys.stdout, master_params=parsed)
    params = processed_args.params.extract()
    reflection_files = processed_args.reflection_files
    if (len(reflection_files) == 0):
        if (params.hkl_file is None):
            raise Sorry("No reflection file found.")
        else:
            hkl_in = file_reader.any_file(params.hkl_file, force_type="hkl")
            hkl_in.assert_file_type("hkl")
            reflection_files = [hkl_in.file_object]
    crystal_symmetry = processed_args.crystal_symmetry
    if (crystal_symmetry is None):
        if (params.space_group is not None) and (params.unit_cell is not None):
            from cctbx import crystal
            crystal_symmetry = crystal.symmetry(
                space_group_info=params.space_group,
                unit_cell=params.unit_cell)
        else:
            raise Sorry("No crystal symmetry found.")
    if (len(processed_args.pdb_file_names) == 0):
        if (params.pdb_file is None):
            raise Sorry("No model file found.")
        else:
            pdb_file_names = [params.pdb_file]
    else:
        pdb_file_names = processed_args.pdb_file_names
    #
    rfs = reflection_file_utils.reflection_file_server(
        crystal_symmetry=crystal_symmetry,
        force_symmetry=True,
        reflection_files=reflection_files,
        err=StringIO())
    parameters = mmtbx.utils.data_and_flags_master_params().extract()
    parameters.labels = params.f_obs_label
    parameters.r_free_flags.label = params.r_free_flags_label
    determine_data_and_flags_result = mmtbx.utils.determine_data_and_flags(
        reflection_file_server=rfs,
        parameters=parameters,
        keep_going=True,
        log=StringIO())
    f_obs = determine_data_and_flags_result.f_obs
    print("Input data:")
    print("  Iobs or Fobs:", f_obs.info().labels)
    r_free_flags = determine_data_and_flags_result.r_free_flags
    print("  Free-R flags:", r_free_flags.info().labels)
    #
    parameters = mmtbx.utils.experimental_phases_params.extract()
    parameters.labels = params.hendrickson_lattman_coefficients_label
    experimental_phases_result = mmtbx.utils.determine_experimental_phases(
        reflection_file_server=rfs,
        parameters=parameters,
        log=StringIO(),
        parameter_scope="",
        working_point_group=None,
        symmetry_safety_check=True,
        ignore_all_zeros=True)
    if (experimental_phases_result is not None):
        print("  HL coefficients:", experimental_phases_result.info().labels)
    experimental_phases = extract_experimental_phases(
        experimental_phases=experimental_phases_result, f_obs=f_obs)
    #
    if (r_free_flags is None):
        r_free_flags = f_obs.array(data=flex.bool(f_obs.data().size(), False))
    #
    pdb_inp = mmtbx.utils.pdb_inp_from_multiple_files(pdb_file_names,
                                                      log=sys.stdout)
    model = mmtbx.model.manager(model_input=pdb_inp,
                                process_input=False,
                                crystal_symmetry=crystal_symmetry,
                                log=sys.stdout)
    if (model.get_number_of_models() > 1):  #XXX support multi-models
        raise Sorry("Multiple model file not supported in this tool.")
    # XXX Twining not supported
    xray_structure = model.get_xray_structure()
    if (not xray_structure.unit_cell().is_similar_to(f_obs.unit_cell())):
        raise Sorry(
            "The unit cells in the model and reflections files are not " +
            "isomorphous.")
    print("Input model:")
    print("  number of atoms:", xray_structure.scatterers().size())
    fmodel = mmtbx.f_model.manager(xray_structure=xray_structure,
                                   r_free_flags=r_free_flags,
                                   f_obs=f_obs,
                                   abcd=experimental_phases)
    fmodel.update_all_scales(
        update_f_part1=True,
        remove_outliers=params.remove_f_obs_outliers,
        bulk_solvent_and_scaling=params.bulk_solvent_and_scaling)
    print("Overall statistics:")
    fmodel.info().show_all()
    #
    print("Output data:")
    if (params.output_file_name is not None):
        output_file_name = params.output_file_name
    else:
        pdb_file_bn = os.path.basename(pdb_file_names[0])
        hkl_file_bn = os.path.basename(reflection_files[0].file_name())
        try:
            pdb_file_prefix = pdb_file_bn[:pdb_file_bn.index(".")]
        except ValueError:
            pdb_file_prefix = pdb_file_bn
        try:
            hkl_file_prefix = hkl_file_bn[:hkl_file_bn.index(".")]
        except ValueError:
            hkl_file_prefix = hkl_file_bn
        output_file_name = "%s_%s.mtz" % (pdb_file_prefix, hkl_file_prefix)
    print("  file name:", output_file_name)
    print("  to see the contnt of %s:" % output_file_name)
    print("    phenix.mtz.dump %s" % output_file_name)
    out = open(output_file_name, "w")
    fmodel.export(out=out)
    out.close()
    print("All done.")
    return output_file_name
    elif sg == 'C121':
        rot0 = rt_mx("x,y,z")
        rot1 = rt_mx("-x,y,-z")
        rot2 = rt_mx("x+1/2,y+1/2,z")
        rot3 = rt_mx("-x+1/2,y+1/2,-z")
        rt_mx_matrices = (rot0, rot1, rot2, rot3)

    else:
        print "%s not found\n" % sg
        sys.exit()

    return rt_mx_matrices


pdb_in = file_reader.any_file(pdb_file).file_object
pdb_hierarchy = pdb_in.construct_hierarchy()
xrs = pdb_in.xray_structure_simple()
rt_mx_matrices = get_symm(sg)
unit_cell = xrs.unit_cell()

import cctbx
from cctbx import uctbx
uc1 = cctbx.uctbx.unit_cell(parameters=uc)
#~ print uc1.parameters()
#~ uc1.show_parameters()
#~ print uc1.volume()

symm = pdb_in.crystal_symmetry()
space_group = symm.space_group()
Example #34
0
def run(args,
    out=sys.stdout,
    auto_extract_labels=True,
    use_current_directory_if_not_specified=False,
    warn=True):
  master_params = libtbx.phil.parse(master_phil_str,
    process_includes=True)
  if (len(args) == 0):
    print("""\
************************************************************************
  phenix.table_one - statistics harvesting for publication
************************************************************************

  note: this is somewhat difficult to configure on the command line at
        present; you may find it more convenient to use the PHENIX GUI.

""", file=out)
    print("# Parameter template for phenix.table_one:", file=out)
    master_params.show(out=out)
    print("# (the 'structure' scope may be copied as many times as ", file=out)
    print("#  necessary to handle multiple datasets.)", file=out)
    print("# Alternate usage:", file=out)
    print("#   phenix.table_one model.pdb data.mtz [logfile]*", file=out)
    return None
  if (warn):
    print("""
  note: this is somewhat difficult to configure on the command line at
        present; you may find it more convenient to use the PHENIX GUI.
    """, file=out)
    time.sleep(2)
  master_parmas = libtbx.phil.parse(master_phil_str)
  interpreter = libtbx.phil.command_line.argument_interpreter(
    master_phil=master_params,
    home_scope="table_one")
  file_phil = []
  cmdline_phil = []
  pdb_file = None
  mtz_file = None
  unmerged_data = None
  log_files = []
  for arg in args :
    if os.path.isfile(arg):
      f = file_reader.any_file(arg)
      if (f.file_type == "phil"):
        file_phil.append(f.file_object)
      elif (f.file_type == "pdb"):
        pdb_file = f.file_name
      elif (f.file_type == "hkl"):
        mtz_file = f.file_name
      elif (f.file_type == "txt"):
        log_files.append(f.file_name)
    else :
      if arg.startswith("unmerged_data="):
        unmerged_data = os.path.abspath("=".join(arg.split("=")[1:]))
        continue
      if arg.startswith("--"):
        arg = arg[2:] + "=True"
      try :
        arg_phil = interpreter.process(arg=arg)
      except RuntimeError :
        print("Ignoring unknown argument %s" % arg, file=out)
      else :
        cmdline_phil.append(arg_phil)
  working_phil = master_params.fetch(sources=file_phil+cmdline_phil)
  params = working_phil.extract()
  if (pdb_file is not None):
    if (len(params.table_one.structure) > 0):
      raise Sorry("You already have a structure defined in the parameter "+
        "file; to add structures, you should edit the parameters instead of "+
        "specifying additional PDB and data files on the command line.")
    if (mtz_file is None):
      raise Sorry("You have supplied a PDB file, but no corresponding MTZ "+
                  "file.")
    log_file_str = "\n".join([ "log_file=%s" % f for f in log_files ])
    structure_params = libtbx.phil.parse(structure_params_str)
    new_structure = structure_params.extract().structure[0]
    new_structure.pdb_file = pdb_file
    new_structure.mtz_file = mtz_file
    new_structure.unmerged_data = unmerged_data
    params.table_one.structure.append(new_structure)
  if auto_extract_labels :
    extract_labels(params.table_one, out=out)
  if use_current_directory_if_not_specified :
    if (params.table_one.output.directory is None):
      params.table_one.output.directory = os.getcwd()
  validate_params(params)
  if (params.table_one.multiprocessing.nproc is None):
    params.table_one.multiprocessing.nproc = 1
  final_phil = master_params.format(python_object=params)
  if params.table_one.output.verbose :
    print("", file=out)
    print("#Final effective parameters:", file=out)
    final_phil.show(out=out)
    print("#---end", file=out)
    print("", file=out)
  final_phil.show(out=open("table_one.eff", "w"))
  table1 = table_one(params.table_one, out=out)
  easy_pickle.dump("%s.pkl" % params.table_one.output.base_name, table1)
  table1.save_multiple(
    file_base=params.table_one.output.base_name,
    formats=params.table_one.output.format)
  return table1
Example #35
0
 def __init__(self,
               pdb_file,
               output_file=None,
               log=None,
               quiet=False,
               set_se_occ=True,
               remove_atoms_with_zero_occupancy=False):
   from iotbx.file_reader import any_file
   import iotbx.pdb
   if (log is None):
     log = null_out()
   pdb_in = any_file(pdb_file, force_type="pdb")
   pdb_in.assert_file_type("pdb")
   hierarchy = pdb_in.file_object.hierarchy
   if (len(hierarchy.models()) > 1):
     raise Sorry("Multi-MODEL PDB files are not supported.")
   n_unknown = 0
   all_atoms = hierarchy.atoms()
   cache = hierarchy.atom_selection_cache()
   # resname UNK is now okay (with some restrictions)
   known_sel = cache.selection("not (element X or resname UNX or resname UNL)")
   semet_sel = cache.selection("element SE and resname MSE")
   zero_occ_sel = all_atoms.extract_occ() == 0
   self.n_unknown = known_sel.count(False)
   self.n_semet = semet_sel.count(True)
   self.n_zero_occ = zero_occ_sel.count(True)
   keep_sel = known_sel
   modified = False
   if ((self.n_unknown > 0) or
       ((self.n_semet > 0) and (set_se_occ)) or
       (self.n_zero_occ > 0) and (remove_atoms_with_zero_occupancy)):
     modified = True
     if (output_file is None):
       output_file = pdb_file
   if (self.n_unknown > 0) and (not quiet):
     print >> log, "Warning: %d unknown atoms or ligands removed:" % \
       self.n_unknown
     for i_seq in (~known_sel).iselection():
       print >> log, "  %s" % all_atoms[i_seq].id_str()
   if (self.n_zero_occ > 0):
     msg = "Warning: %d atoms with zero occupancy present in structure:"
     if (remove_atoms_with_zero_occupancy):
       msg = "Warning: %d atoms with zero occupancy removed:"
       keep_sel &= ~zero_occ_sel
     if (not quiet):
       print >> log, msg % self.n_zero_occ
       for i_seq in zero_occ_sel.iselection():
         print >> log, "  %s" % all_atoms[i_seq].id_str()
   hierarchy_filtered = hierarchy.select(keep_sel)
   if (self.n_semet > 0) and (set_se_occ):
     for atom in hierarchy_filtered.atoms():
       if (atom.element == "SE") and (atom.fetch_labels().resname == "MSE"):
         if (atom.occ == 1.0):
           if (not quiet):
             print >> log, "Set occupancy of %s to 0.99" % atom.id_str()
           atom.occ = 0.99 # just enough to trigger occupancy refinement
   if (modified):
     f = open(output_file, "w")
     # if the input file is actually from the PDB, we need to preserve the
     # header information for downstream code.
     print >> f, "\n".join(pdb_in.file_object.input.title_section())
     print >> f, "\n".join(pdb_in.file_object.input.remark_section())
     print >> f, iotbx.pdb.format_cryst1_record(
       crystal_symmetry=pdb_in.file_object.crystal_symmetry())
     print >> f, hierarchy_filtered.as_pdb_string()
     f.close()
    def run(self, args, command_name, out=sys.stdout):
        command_line = (iotbx_option_parser(
            usage="%s [options]" % command_name,
            description='Example: %s data.mtz data.mtz ref_model.pdb' %
            command_name).option(
                None,
                "--show_defaults",
                action="store_true",
                help="Show list of parameters.")).process(args=args)

        cif_file = None
        processed_args = utils.process_command_line_args(
            args=args, log=sys.stdout, master_params=master_phil)
        params = processed_args.params
        if (params is None): params = master_phil
        self.params = params.extract().ensemble_probability
        pdb_file_names = processed_args.pdb_file_names
        if len(pdb_file_names) != 1:
            raise Sorry("Only one PDB structure may be used")
        pdb_file = file_reader.any_file(pdb_file_names[0])
        self.log = multi_out()
        self.log.register(label="stdout", file_object=sys.stdout)
        self.log.register(label="log_buffer",
                          file_object=StringIO(),
                          atexit_send_to=None)
        sys.stderr = self.log
        log_file = open(
            pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
            '_pensemble.log', "w")

        self.log.replace_stringio(old_label="log_buffer",
                                  new_label="log",
                                  new_file_object=log_file)
        utils.print_header(command_name, out=self.log)
        params.show(out=self.log)
        #
        f_obs = None
        r_free_flags = None
        reflection_files = processed_args.reflection_files

        if self.params.fobs_vs_fcalc_post_nll:
            if len(reflection_files) == 0:
                raise Sorry(
                    "Fobs from input MTZ required for fobs_vs_fcalc_post_nll")

        if len(reflection_files) > 0:
            crystal_symmetry = processed_args.crystal_symmetry
            print('Reflection file : ',
                  processed_args.reflection_file_names[0],
                  file=self.log)
            utils.print_header("Model and data statistics", out=self.log)
            rfs = reflection_file_server(
                crystal_symmetry=crystal_symmetry,
                reflection_files=processed_args.reflection_files,
                log=self.log)

            parameters = extract_xtal_data.data_and_flags_master_params(
            ).extract()
            determine_data_and_flags_result = extract_xtal_data.run(
                reflection_file_server=rfs,
                parameters=parameters,
                keep_going=True)
            f_obs = determine_data_and_flags_result.f_obs
            number_of_reflections = f_obs.indices().size()
            r_free_flags = determine_data_and_flags_result.r_free_flags
            test_flag_value = determine_data_and_flags_result.test_flag_value
            if (r_free_flags is None):
                r_free_flags = f_obs.array(
                    data=flex.bool(f_obs.data().size(), False))

        # process PDB
        pdb_file.assert_file_type("pdb")
        #
        pdb_in = hierarchy.input(file_name=pdb_file.file_name)
        ens_pdb_hierarchy = pdb_in.construct_hierarchy()
        ens_pdb_hierarchy.atoms().reset_i_seq()
        ens_pdb_xrs_s = pdb_in.input.xray_structures_simple()
        number_structures = len(ens_pdb_xrs_s)
        print('Number of structure in ensemble : ',
              number_structures,
              file=self.log)

        # Calculate sigmas from input map only
        if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None:
            # process MTZ
            input_file = file_reader.any_file(
                self.params.ensemble_sigma_map_input)
            if input_file.file_type == "hkl":
                if input_file.file_object.file_type() != "ccp4_mtz":
                    raise Sorry("Only MTZ format accepted for map input")
                else:
                    mtz_file = input_file
            else:
                raise Sorry("Only MTZ format accepted for map input")
            miller_arrays = mtz_file.file_server.miller_arrays
            map_coeffs_1 = miller_arrays[0]
            #
            xrs_list = []
            for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                # get sigma levels from ensemble fc for each structure
                xrs = get_map_sigma(ens_pdb_hierarchy=ens_pdb_hierarchy,
                                    ens_pdb_xrs=ens_pdb_xrs,
                                    map_coeffs_1=map_coeffs_1,
                                    residue_detail=self.params.residue_detail,
                                    ignore_hd=self.params.ignore_hd,
                                    log=self.log)
                xrs_list.append(xrs)
            # write ensemble pdb file, occupancies as sigma level
            filename = pdb_file_names[0].split('/')[-1].replace(
                '.pdb',
                '') + '_vs_' + self.params.ensemble_sigma_map_input.replace(
                    '.mtz', '') + '_pensemble.pdb'
            write_ensemble_pdb(filename=filename,
                               xrs_list=xrs_list,
                               ens_pdb_hierarchy=ens_pdb_hierarchy)

        # Do full analysis vs Fobs
        else:
            model_map_coeffs = []
            fmodel = None
            # Get <fcalc>
            for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                ens_pdb_xrs.set_occupancies(1.0)
                if model == 0:
                    # If mtz not supplied get fobs from xray structure...
                    # Use input Fobs for scoring against nll
                    if self.params.fobs_vs_fcalc_post_nll:
                        dummy_fobs = f_obs
                    else:
                        if f_obs == None:
                            if self.params.fcalc_high_resolution == None:
                                raise Sorry(
                                    "Please supply high resolution limit or input mtz file."
                                )
                            dummy_dmin = self.params.fcalc_high_resolution
                            dummy_dmax = self.params.fcalc_low_resolution
                        else:
                            print(
                                'Supplied mtz used to determine high and low resolution cuttoffs',
                                file=self.log)
                            dummy_dmax, dummy_dmin = f_obs.d_max_min()
                        #
                        dummy_fobs = abs(
                            ens_pdb_xrs.structure_factors(
                                d_min=dummy_dmin).f_calc())
                        dummy_fobs.set_observation_type_xray_amplitude()
                        # If mtz supplied, free flags are over written to prevent array size error
                        r_free_flags = dummy_fobs.array(
                            data=flex.bool(dummy_fobs.data().size(), False))
                    #
                    fmodel = utils.fmodel_simple(
                        scattering_table="wk1995",
                        xray_structures=[ens_pdb_xrs],
                        f_obs=dummy_fobs,
                        target_name='ls',
                        bulk_solvent_and_scaling=False,
                        r_free_flags=r_free_flags)
                    f_calc_ave = fmodel.f_calc().array(
                        data=fmodel.f_calc().data() * 0).deep_copy()
                    # XXX Important to ensure scale is identical for each model and <model>
                    fmodel.set_scale_switch = 1.0
                    f_calc_ave_total = fmodel.f_calc().data().deep_copy()
                else:
                    fmodel.update_xray_structure(xray_structure=ens_pdb_xrs,
                                                 update_f_calc=True,
                                                 update_f_mask=False)
                    f_calc_ave_total += fmodel.f_calc().data().deep_copy()
                print('Model :', model + 1, file=self.log)
                print("\nStructure vs real Fobs (no bulk solvent or scaling)",
                      file=self.log)
                print('Rwork          : %5.4f ' % fmodel.r_work(),
                      file=self.log)
                print('Rfree          : %5.4f ' % fmodel.r_free(),
                      file=self.log)
                print('K1             : %5.4f ' % fmodel.scale_k1(),
                      file=self.log)
                fcalc_edm = fmodel.electron_density_map()
                fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type='Fc')
                fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset(
                    column_root_label='Fc')
                if self.params.output_model_and_model_ave_mtz:
                    fcalc_mtz_dataset.mtz_object().write(
                        file_name=str(model + 1) + "_Fc.mtz")
                model_map_coeffs.append(fcalc_map_coeffs.deep_copy())

            fmodel.update(f_calc=f_calc_ave.array(f_calc_ave_total /
                                                  number_structures))
            print("\nEnsemble vs real Fobs (no bulk solvent or scaling)",
                  file=self.log)
            print('Rwork          : %5.4f ' % fmodel.r_work(), file=self.log)
            print('Rfree          : %5.4f ' % fmodel.r_free(), file=self.log)
            print('K1             : %5.4f ' % fmodel.scale_k1(), file=self.log)

            # Get <Fcalc> map
            fcalc_ave_edm = fmodel.electron_density_map()
            fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients(
                map_type='Fc').deep_copy()
            fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset(
                column_root_label='Fc')
            if self.params.output_model_and_model_ave_mtz:
                fcalc_ave_mtz_dataset.mtz_object().write(file_name="aveFc.mtz")
            fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map()
            fcalc_ave_map_coeffs.apply_volume_scaling()
            fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded()
            fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data)

            print("<Fcalc> Map Stats :", file=self.log)
            fcalc_ave_map_stats.show_summary(f=self.log)
            offset = fcalc_ave_map_stats.min()
            model_neg_ll = []

            number_previous_scatters = 0

            # Run through structure list again and get probability
            xrs_list = []
            for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                if self.params.verbose:
                    print('\n\nModel                   : ',
                          model + 1,
                          file=self.log)
                # Get model atom sigmas vs Fcalc
                fcalc_map = model_map_coeffs[model].fft_map()
                fcalc_map.apply_volume_scaling()
                fcalc_map_data = fcalc_map.real_map_unpadded()
                fcalc_map_stats = maptbx.statistics(fcalc_map_data)
                if self.params.verbose:
                    print("Fcalc map stats         :", file=self.log)
                fcalc_map_stats.show_summary(f=self.log)

                xrs = get_map_sigma(
                    ens_pdb_hierarchy=ens_pdb_hierarchy,
                    ens_pdb_xrs=ens_pdb_xrs,
                    fft_map_1=fcalc_map,
                    model_i=model,
                    residue_detail=self.params.residue_detail,
                    ignore_hd=self.params.ignore_hd,
                    number_previous_scatters=number_previous_scatters,
                    log=self.log)
                fcalc_sigmas = xrs.scatterers().extract_occupancies()
                del fcalc_map
                # Get model atom sigmas vs <Fcalc>
                xrs = get_map_sigma(
                    ens_pdb_hierarchy=ens_pdb_hierarchy,
                    ens_pdb_xrs=ens_pdb_xrs,
                    fft_map_1=fcalc_ave_map_coeffs,
                    model_i=model,
                    residue_detail=self.params.residue_detail,
                    ignore_hd=self.params.ignore_hd,
                    number_previous_scatters=number_previous_scatters,
                    log=self.log)

                ### For testing other residue averaging options
                #print xrs.residue_selections

                fcalc_ave_sigmas = xrs.scatterers().extract_occupancies()
                # Probability of model given <model>
                prob = fcalc_ave_sigmas / fcalc_sigmas
                # XXX debug option
                if False:
                    for n, p in enumerate(prob):
                        print(' {0:5d} {1:5.3f}'.format(n, p), file=self.log)
                # Set probabilty between 0 and 1
                # XXX Make Histogram / more stats
                prob_lss_zero = flex.bool(prob <= 0)
                prob_grt_one = flex.bool(prob > 1)
                prob.set_selected(prob_lss_zero, 0.001)
                prob.set_selected(prob_grt_one, 1.0)
                xrs.set_occupancies(prob)
                xrs_list.append(xrs)
                sum_neg_ll = sum(-flex.log(prob))
                model_neg_ll.append((sum_neg_ll, model))
                if self.params.verbose:
                    print('Model probability stats :', file=self.log)
                    print(prob.min_max_mean().show(), file=self.log)
                    print('  Count < 0.0 : ',
                          prob_lss_zero.count(True),
                          file=self.log)
                    print('  Count > 1.0 : ',
                          prob_grt_one.count(True),
                          file=self.log)

                # For averaging by residue
                number_previous_scatters += ens_pdb_xrs.sites_cart().size()

            # write ensemble pdb file, occupancies as sigma level
            write_ensemble_pdb(
                filename=pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
                '_pensemble.pdb',
                xrs_list=xrs_list,
                ens_pdb_hierarchy=ens_pdb_hierarchy)

            # XXX Test ordering models by nll
            # XXX Test removing nth percentile atoms
            if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll:
                for percentile in [1.0, 0.975, 0.95, 0.9, 0.8, 0.6, 0.2]:
                    model_neg_ll = sorted(model_neg_ll)
                    f_calc_ave_total_reordered = None
                    print_list = []
                    for i_neg_ll in model_neg_ll:
                        xrs = xrs_list[i_neg_ll[1]]
                        nll_occ = xrs.scatterers().extract_occupancies()

                        # Set q=0 nth percentile atoms
                        sorted_nll_occ = sorted(nll_occ, reverse=True)
                        number_atoms = len(sorted_nll_occ)
                        percentile_prob_cutoff = sorted_nll_occ[
                            int(number_atoms * percentile) - 1]
                        cutoff_selections = flex.bool(
                            nll_occ < percentile_prob_cutoff)
                        cutoff_nll_occ = flex.double(nll_occ.size(),
                                                     1.0).set_selected(
                                                         cutoff_selections,
                                                         0.0)
                        #XXX Debug
                        if False:
                            print('\nDebug')
                            for x in range(len(cutoff_selections)):
                                print(cutoff_selections[x], nll_occ[x],
                                      cutoff_nll_occ[x])
                            print(percentile)
                            print(percentile_prob_cutoff)
                            print(cutoff_selections.count(True))
                            print(cutoff_selections.size())
                            print(cutoff_nll_occ.count(0.0))
                            print('Count q = 1           : ',
                                  cutoff_nll_occ.count(1.0))
                            print('Count scatterers size : ',
                                  cutoff_nll_occ.size())

                        xrs.set_occupancies(cutoff_nll_occ)
                        fmodel.update_xray_structure(xray_structure=xrs,
                                                     update_f_calc=True,
                                                     update_f_mask=True)

                        if f_calc_ave_total_reordered == None:
                            f_calc_ave_total_reordered = fmodel.f_calc().data(
                            ).deep_copy()
                            f_mask_ave_total_reordered = fmodel.f_masks(
                            )[0].data().deep_copy()
                            cntr = 1
                        else:
                            f_calc_ave_total_reordered += fmodel.f_calc().data(
                            ).deep_copy()
                            f_mask_ave_total_reordered += fmodel.f_masks(
                            )[0].data().deep_copy()
                            cntr += 1
                        fmodel.update(
                            f_calc=f_calc_ave.array(
                                f_calc_ave_total_reordered / cntr).deep_copy(),
                            f_mask=f_calc_ave.array(
                                f_mask_ave_total_reordered / cntr).deep_copy())

                        # Update solvent and scale
                        # XXX Will need to apply_back_trace on latest version
                        fmodel.set_scale_switch = 0
                        fmodel.update_all_scales()

                        # Reset occ for outout
                        xrs.set_occupancies(nll_occ)
                        # k1 updated vs Fobs
                        if self.params.fobs_vs_fcalc_post_nll:
                            print_list.append([
                                cntr, i_neg_ll[0], i_neg_ll[1],
                                fmodel.r_work(),
                                fmodel.r_free()
                            ])

                    # Order models by nll and print summary
                    print(
                        '\nModels ranked by nll <Fcalc> R-factors recalculated',
                        file=self.log)
                    print('Percentile cutoff : {0:5.3f}'.format(percentile),
                          file=self.log)
                    xrs_list_sorted_nll = []
                    print('      |      NLL     <Rw>     <Rf>    Ens Model',
                          file=self.log)
                    for info in print_list:
                        print(' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'.
                              format(
                                  info[0],
                                  info[1],
                                  info[3],
                                  info[4],
                                  info[2] + 1,
                              ),
                              file=self.log)
                        xrs_list_sorted_nll.append(xrs_list[info[2]])

                # Output nll ordered ensemble

                write_ensemble_pdb(
                    filename='nll_ordered_' +
                    pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
                    '_pensemble.pdb',
                    xrs_list=xrs_list_sorted_nll,
                    ens_pdb_hierarchy=ens_pdb_hierarchy)
Example #37
0
def run2(args, log=sys.stdout):
    """
  Fetches pdb files and/or reflection data from the PDB.

  Parameters
  ----------
  args : list of str
  log : file, optional

  Returns
  -------
  str or list of str
      List of file names that were downloaded.
  """
    if len(args) < 1:
        raise Usage("""\
phenix.fetch_pdb [-x|-f|--all] [--mtz] [-q] ID1 [ID2, ...]

Command-line options:
  -x      Get structure factors (mmCIF file)
  -c      Get model file in mmCIF format
  -f      Get sequence (FASTA file)
  --all   Download all available data
  --mtz   Download structure factors and PDB file, and generate MTZ
  -q      suppress printed output
""")
    from iotbx.pdb.fetch import get_pdb
    quiet = False
    convert_to_mtz = False
    data_type = "pdb"
    format = "pdb"
    mirror = "rcsb"
    ids = []
    for arg in args:
        if (arg == "--all"):
            data_type = "all"
        elif (arg == "-x"):
            data_type = "xray"
        elif (arg == "-f"):
            data_type = "fasta"
        elif (arg == "-q"):
            quiet = True
        elif (arg == "--mtz"):
            convert_to_mtz = True
            data_type = "all"
        elif (arg == "-c"):
            format = "cif"
        elif (arg.startswith("--mirror=")):
            mirror = arg.split("=")[1]
            if (not mirror in ["rcsb", "pdbe", "pdbj"]):
                raise Sorry(
                    "Unrecognized mirror site '%s' (choices: rcsb, pdbe, pdbj)"
                    % mirror)
        else:
            ids.append(arg)
    if (len(ids) == 0):
        raise Sorry("No PDB IDs specified.")
    if (data_type != "all"):
        #mirror = "rcsb"
        files = []
        for id in ids:
            files.append(get_pdb(id, data_type, mirror, log, format=format))
        if (len(files) == 1):
            return files[0]
        return files
    else:
        files = []
        for id in ids:
            for data_type_, data_format in [("pdb", "pdb"), ("fasta", "pdb"),
                                            ("xray", "pdb"), ("pdb", "cif")]:
                files.append(
                    get_pdb(id, data_type_, mirror, log, format=data_format))
            if (convert_to_mtz):
                misc_args = [
                    "--merge", "--map_to_asu", "--extend_flags",
                    "--ignore_bad_sigmas"
                ]
                easy_run.call("phenix.cif_as_mtz %s-sf.cif %s" %
                              (id, " ".join(misc_args)))
                if os.path.isfile("%s-sf.mtz" % id):
                    os.rename("%s-sf.mtz" % id, "%s.mtz" % id)
                    print >> log, "Converted structure factors saved to %s.mtz" % id
                #  os.remove("%s-sf.cif" % id)
                files[-1] = os.path.abspath("%s.mtz" % id)
                if (not os.path.isfile("%s.mtz" % id)):
                    raise Sorry(
                        "MTZ conversion failed - try running phenix.cif_as_mtz "
                        + "manually (and check %s-sf.cif for format errors)." %
                        id)
                from iotbx.file_reader import any_file
                mtz_in = any_file("%s.mtz" % id)
                mtz_in.assert_file_type("hkl")
                for array in mtz_in.file_server.miller_arrays:
                    if (array.anomalous_flag()):
                        print >> log, "  %s is anomalous" % array.info(
                        ).label_string()
        return files
Example #38
0
    def __init__(self,
                 mtz_file,
                 pdb_file,
                 wilson_b=None,
                 data_label=None,
                 n_resolution_bins=20,
                 n_intensity_bins=20,
                 out=None):
        if (out is None):
            out = sys.stdout
        if (wilson_b is None) or (pdb_file is None):
            print("""\
  WARNING: missing desired Wilson B-factor and/or PDB file
           for noise profile data.  Without this information
           the intensity falloff with resolution will probably
           not be the same for your synthetic data and the
           data used to generate sigmas.
""",
                  file=out)
        self._resolution_bins = []
        from iotbx.file_reader import any_file
        from scitbx.array_family import flex
        f = any_file(mtz_file, force_type="hkl")
        f.assert_file_type("hkl")
        miller_arrays = f.file_server.miller_arrays
        f_obs = None
        i_obs = None
        for array in miller_arrays:
            if (array.info().label_string()
                    == data_label) or (data_label is None):
                if (array.is_xray_amplitude_array()) and (f_obs is None):
                    f_obs = array
                elif (array.is_xray_intensity_array()) and (i_obs is None):
                    i_obs = array
        if (i_obs is None):
            assert (f_obs is not None) and (f_obs.sigmas() is not None)
            i_obs = f_obs.f_as_f_sq()
        assert (i_obs.sigmas() is not None)
        if (wilson_b is not None) and (pdb_file is not None):
            print("  Correcting reference data intensity falloff...", file=out)
            f_obs = i_obs.f_sq_as_f()
            pdb_hierarchy = any_file(pdb_file).file_object.hierarchy
            n_residues, n_bases = get_counts(pdb_hierarchy)
            iso_scale, aniso_scale = wilson_scaling(F=f_obs,
                                                    n_residues=n_residues,
                                                    n_bases=n_bases)
            # TODO anisotropic?
            print("  Scaling statistics for unmodified reference data:",
                  file=out)
            show_b_factor_info(iso_scale, aniso_scale, out=out)
            delta_b = wilson_b - iso_scale.b_wilson
            f_obs = f_obs.apply_debye_waller_factors(b_iso=delta_b)
            i_obs = f_obs.f_as_f_sq()
        i_mean = flex.max(i_obs.data())
        i_norm = i_obs.customized_copy(data=i_obs.data() / i_mean,
                                       sigmas=i_obs.sigmas() / i_mean)
        i_norm.setup_binner(n_bins=20)
        i_over_sigma = i_obs.data() / i_obs.sigmas()
        for i_bin in i_norm.binner().range_used():
            sel = i_norm.binner().selection(i_bin)
            i_shell = i_norm.select(sel)
            sn_shell = i_over_sigma.select(sel)
            noise_bins = shell_intensity_bins(i_norm=i_shell,
                                              i_over_sigma=sn_shell,
                                              n_bins=n_intensity_bins)
            self._resolution_bins.append(noise_bins)
Example #39
0
 def __init__(self, params, hkl_in=None, pdb_in=None, out=sys.stdout):
     adopt_init_args(self, locals())
     self.params = params
     self.out = out
     self.pdb_hierarchy = None
     if (params.pdb_file is None) and (params.hkl_file is None):
         raise Sorry("No PDB file specified.")
     if (params.generate_noise.add_noise) and (params.hkl_file is None):
         if (params.generate_noise.noise_profile_file is None):
             raise Sorry(
                 "noise_profile_file required when add_noise=True and "
                 "hkl_file is undefined.")
     if (pdb_in is None) and (params.pdb_file is not None):
         f = file_reader.any_file(params.pdb_file, force_type="pdb")
         f.assert_file_type("pdb")
         self.pdb_in = f.file_object
     if (self.hkl_in is None) and (params.hkl_file is not None):
         f = file_reader.any_file(params.hkl_File, force_type="hkl")
         f.assert_file_type("hkl")
         self.hkl_in = f.file_object
     if (self.pdb_in is not None):
         self.pdb_hierarchy = self.pdb_in.hierarchy
     if (self.hkl_in is not None):
         make_header("Extracting experimental data", out=sys.stdout)
         f_raw, r_free = self.from_hkl()
     elif (self.pdb_in is not None):
         make_header("Generating fake data with phenix.fmodel",
                     out=sys.stdout)
         f_raw, r_free = self.from_pdb()
     if (params.r_free_flags.file_name is not None):
         f_raw, r_free = self.import_r_free_flags(f_raw)
     self.r_free = r_free
     make_header("Applying low-resolution filtering", out=sys.stdout)
     print("  Target resolution: %.2f A" % params.d_min, file=out)
     self.n_residues, self.n_bases = None, None
     if (self.pdb_in is not None):
         self.n_residues, self.n_bases = get_counts(self.pdb_hierarchy)
     #if (params.auto_adjust):
     #  if (pdb_in is None):
     #    raise Sorry("You must supply a PDB file when auto_adjust=True.")
     self.f_out = self.truncate_data(f_raw)
     if (params.generate_noise.add_noise):
         make_header("Adding noise using sigma profile", out=sys.stdout)
         if (self.f_out.sigmas() is None):
             if (self.pdb_in is not None):
                 iso_scale, aniso_scale = wilson_scaling(
                     self.f_out, self.n_residues, self.n_bases)
             i_obs = create_sigmas(f_obs=self.f_out,
                                   params=params.generate_noise,
                                   wilson_b=iso_scale.b_wilson,
                                   return_as_amplitudes=False)
         apply_sigma_noise(i_obs)
         self.f_out = i_obs.f_sq_as_f()
     make_header("Done processing", out=sys.stdout)
     print("  Completeness after processing: %.2f%%" %
           (self.f_out.completeness() * 100.),
           file=out)
     print("  Final resolution: %.2f A" % self.f_out.d_min(), file=out)
     if (self.pdb_in is not None):
         iso_scale, aniso_scale = wilson_scaling(self.f_out,
                                                 self.n_residues,
                                                 self.n_bases)
         print("", file=out)
         print("  Scaling statistics for output data:", file=out)
         show_b_factor_info(iso_scale, aniso_scale, out=out)
         print("", file=out)
     self.write_output()
Example #40
0
def run(args, out=None):
    if (out is None):
        out = sys.stdout
    make_header("mmtbx.simulate_low_res_data", out=out)
    print("""
  For generation of realistic data (model-based, or using real
  high-resolution data) for methods development.

*********************************** WARNING: ***********************************
 this is an experimental program - definitely NOT bug-free.
                  Use at your own risk!

  Usage:
   mmtbx.simulate_low_res_data model.pdb [options...]
     (generate data from a PDB file)

   mmtbx.simulate_low_res_data highres.mtz [model.pdb] [options...]
     (truncate high-resolution data)

   mmtbx.simulate_low_res_data --help
     (print full parameters with additional info)
""",
          file=out)
    if (len(args) == 0) or ("--help" in args):
        print("# full parameters:", file=out)
        if ("--help" in args):
            master_phil.show(attributes_level=1)
        else:
            master_phil.show()
        return
    from iotbx import file_reader
    interpreter = master_phil.command_line_argument_interpreter(
        home_scope="simulate_data")
    pdb_in = None
    pdb_hierarchy = None
    hkl_in = None
    user_phil = []
    for arg in args:
        if os.path.isfile(arg):
            f = file_reader.any_file(arg)
            if (f.file_type == "pdb"):
                pdb_in = f.file_object
                user_phil.append(
                    interpreter.process(arg="pdb_file=%s" % f.file_name))
            elif (f.file_type == "hkl"):
                hkl_in = f.file_object
                user_phil.append(
                    interpreter.process(arg="hkl_file=%s" % f.file_name))
            elif (f.file_type == "phil"):
                user_phil.append(f.file_object)
        else:
            try:
                arg_phil = interpreter.process(arg=arg)
            except RuntimeError:
                print("ignoring uninterpretable argument '%s'" % arg, file=out)
            else:
                user_phil.append(arg_phil)
    working_phil = master_phil.fetch(sources=user_phil)
    make_header("Working parameters", out=out)
    working_phil.show(prefix="  ")
    params_ = working_phil.extract()
    params = params_.simulate_data
    prepare_data(params=params, hkl_in=hkl_in, pdb_in=pdb_in, out=out)