def exercise () : pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/1ywf.pdb", test=os.path.isfile) if (pdb_file is None) : print "phenix_regression not available, skipping test." return from iotbx.command_line.pdb_add_conformations import run out = cStringIO.StringIO() run([pdb_file], out=out) assert contains_lines(out.getvalue(), "Modified model: 4254 atoms") out = cStringIO.StringIO() run([pdb_file, "atom_selection=\"chain A and not resname HOH\""], out=out) assert contains_lines(out.getvalue(), "Modified model: 3990 atoms") run([pdb_file, "new_occ=0.4", "atom_selection=\"resseq 1:275\""], out=out) from iotbx import file_reader pdb_in = file_reader.any_file("1ywf_split.pdb", force_type="pdb").file_object atoms = pdb_in.input.atoms() occ = atoms.extract_occ() assert (occ.count(0.6) == occ.count(0.4) == 1858) out = cStringIO.StringIO() run([pdb_file, "n_confs=3", "new_occ=0.25"], out=out) pdb_in = file_reader.any_file("1ywf_split.pdb", force_type="pdb").file_object assert contains_lines(out.getvalue(), """\ WARNING: zero-occupancy atom: HETATM 1940 O AHOH A 354 -0.009 56.525 -3.872 0.25 29.17 O\ """) atoms = pdb_in.input.atoms() assert (atoms.size() == 6381) occ = atoms.extract_occ() assert (occ.count(0.5) == 2126) and (occ.count(0.25) == 4254) try : run([pdb_file, "atom_selection=\"chain G\""], out=out) except Sorry, e : assert (str(e) == "Empty selection.")
def exercise_twin_detwin () : random.seed(12345) flex.set_random_seed(12345) xrs = random_structure.xray_structure( unit_cell=(12,5,12,90,90,90), space_group_symbol="P1", n_scatterers=12, elements="random") fc = abs(xrs.structure_factors(d_min=1.5).f_calc()) fc = fc.set_observation_type_xray_amplitude() mtz_file = "tmp_massage_in.mtz" fc.as_mtz_dataset(column_root_label="F").mtz_object().write(mtz_file) massage_data.run( args=[ mtz_file, "aniso.action=None", "outlier.action=None", "symmetry.action=twin", "twin_law='l,-k,h'", "fraction=0.3", "hklout=tmp_massage_twinned.mtz", ], out=null_out()) assert op.isfile("tmp_massage_twinned.mtz") mtz_in = file_reader.any_file("tmp_massage_twinned.mtz") fc_twin = mtz_in.file_server.miller_arrays[0].f_sq_as_f() fc_twin, fc_tmp = fc_twin.common_sets(other=fc) for hkl, f1, f2 in zip(fc_tmp.indices(), fc_tmp.data(), fc_twin.data()) : if (abs(hkl[0]) != abs(hkl[2])) : assert not approx_equal(f1, f2, eps=0.01, out=null_out()), (hkl, f1, f2) massage_data.run( args=[ mtz_file, "aniso.action=None", "outlier.action=None", "symmetry.action=twin", "twin_law='l,-k,h'", "fraction=0.3", "hklout=tmp_massage_twinned.sca", ], out=null_out()) assert op.isfile("tmp_massage_twinned.sca") massage_data.run( args=[ "tmp_massage_twinned.mtz", "aniso.action=None", "outlier.action=None", "symmetry.action=detwin", "twin_law='l,-k,h'", "fraction=0.3", "hklout=tmp_massage_detwinned.mtz", ], out=null_out()) mtz_in = file_reader.any_file("tmp_massage_detwinned.mtz") fc_detwin = mtz_in.file_server.miller_arrays[0].f_sq_as_f() fc_detwin, fc_tmp = fc_detwin.common_sets(other=fc) # XXX we appear to lose some accuracy here, possibly due to the use of # MTZ format for hkl, f1, f2 in zip(fc_tmp.indices(), fc_tmp.data(), fc_detwin.data()) : assert approx_equal(f1, f2, eps=0.01), hkl
def group_chains_and_sequences (seq_file, pdb_file, **kwds) : from iotbx import file_reader seq_in = file_reader.any_file(seq_file, raise_sorry_if_errors=True, raise_sorry_if_not_expected_format=True) if (seq_in.file_type != "seq") : raise Sorry("Can't parse %s as a sequence file.") pdb_in = file_reader.any_file(pdb_file, raise_sorry_if_errors=True, raise_sorry_if_not_expected_format=True) if (pdb_in.file_type != "pdb") : raise Sorry("Can't parse %s as a PDB or mmCIF file.") kwds['pdb_hierarchy'] = pdb_in.file_object.hierarchy kwds['sequences'] = seq_in.file_object v = validation(**kwds) chain_to_sequence_mappings = {} sequence_to_chain_mappings = {} for chain in v.chains : seq_id = chain.sequence_id chain_id = chain.chain_id if (seq_id is None) : raise Sorry("Can't map chain %s to a sequence in %s." % (chain_id, seq_file)) sequence = seq_in.file_object[seq_id].sequence if (chain_id in chain_to_sequence_mappings) : if (chain_to_sequence_mappings[chain_id] != sequence) : raise Sorry("Multiple unique chains named '%s'" % chain_id) else : chain_to_sequence_mappings[chain_id] = sequence if (not chain.sequence in sequence_to_chain_mappings) : sequence_to_chain_mappings[sequence] = [] sequence_to_chain_mappings[sequence].append(chain_id) return sequence_to_chain_mappings
def exercise_alignment () : aln1 = """\ >1mru_A -----------------GSHMTTPSHLSD-----RYELGEILGFGGMSEVHLARDLRLHR DVAVKVLRADLARDPSFYLRFRREAQNAAALNHPAIVAVYDTGEAETPAGPLPYIVMEYV DGVTLRDIVHTEGPMTPKRAIEVIADACQALNFSHQNGIIHRDVKPANIMISATNAVKVM DFGIARAIADSGNSVTQTAAVIGTAQYLSPEQARGDSVDARSDVYSLGCVLYEVLTGEPP FTGDSPVSVAYQHVREDPIPPSARHEGLSADLDAVVLKALAKNPENRYQTAAEMRADLVR VHNGEPPEAPKVLTDAERTSLLSSAAGNLSGPR >2h34_A MGSSHHHHHHSSGLVPRGSHMDGTAESREGTQFGPYRLRRLVGRGGMGDVYEAEDTVRER IVALKLMSETLSSDPVFRTRMQREARTAGRLQEPHVVPIHDFGEID---GQL-YVDMRLI NGVDLAAMLRRQGPLAPPRAVAIVRQIGSALDAAHAAGATHRDVKPENILVSADDFAYLV DFGIASATTD--EKLTQLGNTVGTLYYMAPERFSESHATYRADIYALTCVLYECLTGSPP YQGDQ-LSVMGAHINQAIPRPSTVRPGIPVAFDAVIARGMAKNPEDRYVTCGDLSA---- -----AAHAALATADQDRATDILR--------R""" open("seqs.aln", "w").write(aln1) f = any_file("seqs.aln") f.assert_file_type("aln") assert (f.file_object.names == ["1mru_A", "2h34_A"]) aln2 = """\ MUSCLE (3.8) multiple sequence alignment 1mru_A -----------------GSHMTTPSHLSD-----RYELGEILGFGGMSEVHLARDLRLHR 2h34_A MGSSHHHHHHSSGLVPRGSHMDGTAESREGTQFGPYRLRRLVGRGGMGDVYEAEDTVRER **** .: : * * ::* ***.:*: * * * 1mru_A DVAVKVLRADLARDPSFYLRFRREAQNAAALNHPAIVAVYDTGEAETPAGPLPYIVMEYV 2h34_A IVALKLMSETLSSDPVFRTRMQREARTAGRLQEPHVVPIHDFGEID---GQL-YVDMRLI **:*:: *: ** * *:.***..*. *: * :*.::* ** : * * *: * :""" open("seqs.aln", "w").write(aln2) f = any_file("seqs.aln") f.assert_file_type("aln") assert (f.file_object.names == ["1mru_A", "2h34_A"])
def exercise () : # # Test command-line program # pdb_in, mtz_in = make_inputs() pdb_file = file_reader.any_file(pdb_in, force_type="pdb") hierarchy = pdb_file.file_object.hierarchy old_ligand = None for chain in hierarchy.only_model().chains() : if (chain.id != "B") : continue for residue_group in chain.residue_groups() : atom_group = residue_group.only_atom_group() if (atom_group.resname == "ACT") : old_ligand = atom_group.detached_copy() residue_group.remove_atom_group(atom_group) break assert old_ligand is not None open("tst_ligand_ncs_start.pdb", "w").write(hierarchy.as_pdb_string( crystal_symmetry=pdb_file.file_object.crystal_symmetry())) args = [ "tst_ligand_ncs_start.pdb", mtz_in, "ligand_code=ACT", ] from mmtbx.command_line import apply_ncs_to_ligand if op.isfile("ncs_ligands.pdb") : os.remove("ncs_ligands.pdb") result = apply_ncs_to_ligand.run(args=args, out=null_out()) assert result.n_ligands_new == 1 assert op.isfile("ncs_ligands.pdb") pdb_out = file_reader.any_file("ncs_ligands.pdb", force_type="pdb") hierarchy_new = pdb_out.file_object.hierarchy new_ligand = None for chain in hierarchy_new.only_model().chains() : if (chain.id != "B") : continue for residue_group in chain.residue_groups() : atom_group = residue_group.only_atom_group() if (atom_group.resname == "ACT") : new_ligand = atom_group.detached_copy() assert new_ligand is not None rmsd = old_ligand.atoms().extract_xyz().rms_difference( new_ligand.atoms().extract_xyz()) assert (rmsd < 0.5) # # Unit tests # import mmtbx.ncs.ligands operators = mmtbx.ncs.ligands.find_ncs_operators(hierarchy, log=null_out()) assert len(operators) == 1 group_ops = operators[0] assert len(group_ops) == 2 assert (len(group_ops[0].selection) == 7) for g_op in group_ops: out = StringIO() g_op.show_summary(out=out, prefix=" ") assert out.getvalue().count("Rotation:") == 1
def run_post_refinement ( pdb_file, map_coeffs_file, output_file=None, params=None, f_map_label="2FOFCWT", diff_map_label="FOFCWT", model_map_label="F-model", write_model=True, out=None) : if (out is None) : out = sys.stdout if (params is None) : params = get_master_phil().fetch().extract().prune from iotbx import file_reader pdb_in = file_reader.any_file(pdb_file, force_type="pdb") pdb_in.assert_file_type("pdb") pdb_hierarchy = pdb_in.file_object.hierarchy pdb_hierarchy.atoms().reset_i_seq() # XXX this probably shouldn't be necessary pdb_hierarchy.atoms().set_chemical_element_simple_if_necessary() mtz_in = file_reader.any_file(map_coeffs_file, force_type="hkl") mtz_in.assert_file_type("hkl") f_map_coeffs = diff_map_coeffs = model_map_coeffs = None for array in mtz_in.file_server.miller_arrays : labels = array.info().labels if (labels[0] == f_map_label) : f_map_coeffs = array elif (labels[0] == diff_map_label) : diff_map_coeffs = array elif (labels[0] in [model_map_label, model_map_label + "(+)"]) : model_map_coeffs = array.average_bijvoet_mates() if (f_map_coeffs is None) : raise RuntimeError("2mFo-DFc map not found (expected labels %s)." % f_map_label) elif (diff_map_coeffs is None) : raise RuntimeError("mFo-DFc map not found (expected labels %s)." % diff_map_label) elif (model_map_coeffs is None) : raise RuntimeError("Fc map not found (expected labels %s)." % model_map_label) result = prune_model( f_map_coeffs=f_map_coeffs, diff_map_coeffs=diff_map_coeffs, model_map_coeffs=model_map_coeffs, pdb_hierarchy=pdb_hierarchy, params=params).process_residues(out=out) if (write_model) : if (output_file is None) : base_name = os.path.basename(pdb_file) output_file = os.path.splitext(base_name)[0] + "_pruned.pdb" f = open(output_file, "w") f.write("%s\n" % "\n".join( pdb_in.file_object.input.crystallographic_section())) f.write(pdb_hierarchy.as_pdb_string()) f.close() result.output_file = output_file return result
def run (args, out=sys.stdout) : from cctbx import french_wilson from iotbx import file_reader hkl_file = None sources = [] interpreter = master_phil.command_line_argument_interpreter() for arg in args : if os.path.isfile(arg) : input_file = file_reader.any_file(arg) if (input_file.file_type == "hkl") : hkl_file = input_file sources.append(interpreter.process(arg="file_name=\"%s\"" % arg)) elif (input_file.file_type == "phil") : sources.append(input_file.file_object) else : arg_phil = interpreter.process(arg=arg) sources.append(arg_phil) work_phil = master_phil.fetch(sources=sources) work_params = work_phil.extract() if (work_params.french_wilson.file_name is None) : if (hkl_file is None) : raise Usage("phenix.french_wilson data.mtz [params.eff] [options ...]") else : work_params.french_wilson.file_name = hkl_file.file_name elif (hkl_file is None) : hkl_file = file_reader.any_file(work_params.french_wilson.file_name) params = work_params.french_wilson xray_data_server = hkl_file.file_server crystal_symmetry = xray_data_server.miller_arrays[0].crystal_symmetry() if (crystal_symmetry is None) : raise Sorry("No crystal symmetry found. This program requires an input "+ "format with complete symmetry information.") unit_cell = xray_data_server.miller_arrays[0].unit_cell() if (unit_cell is None) : raise Sorry("No unit cell found. This program requires an input "+ "format with complete unit cell information.") i_obs = None i_obs = xray_data_server.get_xray_data( file_name = params.file_name, labels = params.intensity_labels, ignore_all_zeros = True, parameter_scope = 'french_wilson', parameter_name = 'intensity_labels') import cStringIO xray_data_server.err = cStringIO.StringIO() try : r_free_flags, test_flag_value = xray_data_server.get_r_free_flags( file_name = params.file_name, label = params.r_free_flags.label, test_flag_value = None, disable_suitability_test = False, parameter_scope = "french_wilson.r_free_flags") except Sorry, e : r_free_flags = None
def check_files(phil_scope, file_type, error_message): if (phil_scope is not None): if (isinstance(phil_scope, list)): for file_name in phil_scope: f = file_reader.any_file(file_name) if (f.file_type != file_type): raise Sorry(error_message) else: f = file_reader.any_file(phil_scope) if (f.file_type != file_type): raise Sorry(error_message)
def exercise_maps () : xplor_map = libtbx.env.find_in_repositories( relative_path="phenix_regression/misc/cns.map", test=os.path.isfile) if xplor_map is not None : f = any_file(xplor_map) assert f.file_type == "xplor_map" ccp4_map = libtbx.env.under_dist( module_name="iotbx", path="ccp4_map/tst_input.map") f = any_file(ccp4_map) assert f.file_type == "ccp4_map"
def exercise_1(): pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/1yjp_h.pdb", test=os.path.isfile) mtz_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/reflection_files/1yjp.mtz", test=os.path.isfile) if (None in [pdb_file, mtz_file]) : print "phenix_regression not found, skipping test" return False pdb_in = file_reader.any_file(pdb_file) hierarchy = pdb_in.file_object.hierarchy hierarchy.atoms().reset_i_seq() xrs = pdb_in.file_object.xray_structure_simple() mtz_in = file_reader.any_file(mtz_file) f_obs = mtz_in.file_server.miller_arrays[0] r_free = mtz_in.file_server.miller_arrays[1] r_free = r_free.customized_copy(data=(r_free.data()==1)) fmodel = mmtbx.utils.fmodel_simple( f_obs=f_obs, r_free_flags=r_free, xray_structures=[xrs], scattering_table="n_gaussian") map_stats = real_space_correlation.map_statistics_for_fragment( fragment=hierarchy, fmodel=fmodel) assert approx_equal(map_stats.cc, 0.960, eps=0.01) edm = fmodel.electron_density_map() map1_coeffs = edm.map_coefficients("2mFo-DFc") map1 = map1_coeffs.fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map() map2_coeffs = edm.map_coefficients("Fmodel") map2 = map2_coeffs.fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map() xray_structure = fmodel.xray_structure map_stats2 = real_space_correlation.map_statistics_for_atom_selection( atom_selection=flex.bool(xrs.sites_cart().size(), True), map1=map1, map2=map2, xray_structure=xrs) assert approx_equal(map_stats2.cc, map_stats.cc, 0.01) # XXX other code outside cctbx depends on the current API - do not simply # change the test if this breaks! results = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=hierarchy, log=null_out()) assert isinstance(results, list) assert isinstance(results[0], group_args) assert (results[0].n_atoms == 1) assert (results[0].id_str == " A GLY 1 N ") return True
def get_sequence_n_copies_from_files (seq_file, pdb_file, **kwds) : from iotbx import file_reader seq_in = file_reader.any_file(seq_file, raise_sorry_if_errors=True, raise_sorry_if_not_expected_format=True) if (seq_in.file_type != "seq") : raise Sorry("Can't parse %s as a sequence file.") pdb_in = file_reader.any_file(pdb_file, raise_sorry_if_errors=True, raise_sorry_if_not_expected_format=True) if (pdb_in.file_type != "pdb") : raise Sorry("Can't parse %s as a PDB or mmCIF file.") kwds['pdb_hierarchy'] = pdb_in.file_object.hierarchy kwds['sequences'] = seq_in.file_object return get_sequence_n_copies(**kwds)
def run(args=(), params=None, out=sys.stdout): from iotbx.pdb.remediation import remediator from iotbx import file_reader if (params is None) : interpreter = master_phil.command_line_argument_interpreter() pdb_file = None sources = [] for arg in args : if os.path.isfile(arg) : input_file = file_reader.any_file(arg) if (input_file.file_type == "pdb") : pdb_file = input_file sources.append(interpreter.process(arg="file_name=\"%s\"" % arg)) else : arg_phil = interpreter.process(arg=arg) sources.append(arg_phil) work_phil = master_phil.fetch(sources=sources) work_params = work_phil.extract() else : # XXX for phenix GUI work_params = params if (work_params.remediator.output_file is None) : base, ext = os.path.splitext(work_params.remediator.file_name) work_params.remediator.output_file = base + "_remediated.pdb" if (work_params.remediator.file_name is None) : if (pdb_file is None) : summary = remediator.get_summary() raise Usage(summary) else : work_params.remediator.file_name = pdb_file.file_name params = work_params.remediator remediator.remediator(params) return work_params.remediator.output_file
def generate_magnessium_inputs (file_base="mg_frag", anonymize=True) : """ Creates a fake model and reflection data for a structure containing magnesium ions. Parameters ---------- file_base : str, optional anonymize : bool, optional Replace all ions in the returned pdb file with waters. Returns ------- mtz_path : str pdb_path : str """ pdb_file = write_pdb_input_magnessium_binding (file_base=file_base) mtz_file = generate_mtz_file( file_base=file_base, d_min=1.5) assert os.path.isfile(pdb_file) and os.path.isfile(mtz_file) if anonymize: pdb_in = any_file(pdb_file) hierarchy = pdb_in.file_object.hierarchy hierarchy, n = anonymize_ions(hierarchy, log=null_out()) pdb_file = file_base + "_hoh.pdb" hierarchy.write_pdb_file( file_name=pdb_file, crystal_symmetry=pdb_in.file_object.crystal_symmetry()) assert os.path.isfile(pdb_file) return os.path.abspath(mtz_file), os.path.abspath(pdb_file)
def run (args, out=sys.stdout) : import optparse if (len(args) == 0) or ("--help" in args) : raise Usage(""" mmtbx.validation_summary model.pdb Prints a brief summary of validation criteria, including Ramachandran statistics, rotamer outliers, clashscore, C-beta deviations, plus R-factors and RMS(bonds)/RMS(angles) if found in PDB header. (This is primarily used for evaluating the output of refinement tests; general users are advised to run phenix.model_vs_data or the validation GUI.) """) parser = optparse.OptionParser() options, args = parser.parse_args(args) pdb_file = args[0] if (not os.path.isfile(pdb_file)) : raise Sorry("Not a file: %s" % pdb_file) from iotbx.file_reader import any_file pdb_in = any_file(pdb_file, force_type="pdb").check_file_type("pdb") hierarchy = pdb_in.file_object.hierarchy xrs = pdb_in.file_object.input.xray_structures_simple() s = None extra = "" if (len(xrs) == 1) : s = summary(pdb_file=pdb_file) else : s = ensemble(pdb_hierarchy=hierarchy, n_models=len(xrs)) extra = " (%d models)" % len(xrs) print >> out, "" print >> out, "Validation summary for %s%s:" % (pdb_file, extra) s.show(out=out, prefix=" ", show_percentiles=True) print >> out, "" return s
def exercise_heavy () : from mmtbx.regression import make_fake_anomalous_data from mmtbx.command_line import validate_waters import mmtbx.ions.utils from iotbx.file_reader import any_file file_base = "tst_validate_waters_1" pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl(file_base=file_base) mtz_file = make_fake_anomalous_data.generate_mtz_file( file_base="tst_validate_waters_1", d_min=1.5, anomalous_scatterers=[ group_args(selection="element CD", fp=-0.29, fdp=2.676), group_args(selection="element CL", fp=0.256, fdp=0.5), ]) pdb_in = any_file(pdb_file) hierarchy = pdb_in.file_object.hierarchy hierarchy, n = mmtbx.ions.utils.anonymize_ions(hierarchy, log=null_out()) hierarchy.write_pdb_file("%s_start.pdb" % file_base, crystal_symmetry=pdb_in.file_object.crystal_symmetry()) args = ["tst_validate_waters_1_start.pdb", "tst_validate_waters_1.mtz", "skip_twin_detection=True"] results = validate_waters.run(args=args, out=null_out()) out = StringIO() results.show(out=out) s = easy_pickle.dumps(results) r2 = easy_pickle.loads(s) out2 = StringIO() r2.show(out=out2) assert not show_diff(out.getvalue(), out2.getvalue()) assert (results.n_bad >= 1) and (results.n_heavy == 2)
def extract_phenix_refine_map_coeffs (mtz_file, limit_arrays=None) : assert (limit_arrays is None) or (isinstance(limit_arrays, list)) if not os.path.isfile(mtz_file) : raise Sorry("No map coefficients are available for conversion.") mtz_in = file_reader.any_file(mtz_file) mtz_in.assert_file_type("hkl") miller_arrays = mtz_in.file_server.miller_arrays assert len(miller_arrays) > 0 map_names = {"2FOFCWT" : "2mFo-DFc", "FOFCWT" : "mFo-DFc", "2FOFCWT_no_fill" : "2mFo-DFc_no_fill", "FOFCWT_no_fill" : "mFo-DFc_no_fill"} output_arrays = [] for miller_array in miller_arrays : if miller_array.is_complex_array() : labels = miller_array.info().label_string() if labels.startswith("F-model") : continue if (limit_arrays is not None) and (not labels in limit_arrays) : continue f_label = miller_array.info().labels[0] map_name = map_names.get(f_label) if map_name is None : map_name = f_label output_arrays.append((miller_array, map_name)) return output_arrays
def import_r_free_flags (self, F) : params = self.params.r_free_flags out = self.out from iotbx import file_reader rfree_in = file_reader.any_file(params.file_name) rfree_in.assert_file_type("hkl") hkl_server = rfree_in.file_server r_free_raw, flag_value = hkl_server.get_r_free_flags( file_name=None, label=params.label, test_flag_value=None, parameter_scope="simulate_data.r_free_flags", disable_suitability_test=False) r_free = r_free_raw.customized_copy(data=r_free_raw.data() == flag_value) r_free = r_free.map_to_asu().common_set(F) print >> out, " Using R-free flags from %s:%s" % (rfree_in.file_name, r_free_raw.info().label_string()) if (F.data().size() != r_free.data().size()) : n_missing = F.data().size() - r_free.data().size() assert (n_missing > 0) if (params.missing_flags == "discard") : print >> out, " discarding %d amplitudes without R-free flags" % \ n_missing F = F.common_set(r_free) else : print >> out, " generating missing R-free flags for %d reflections" %\ n_missing missing_set = F.lone_set(r_free) missing_flags = missing_set.generate_r_free_flags( fraction=r_free.data().count(True) / r_free.data().size(), max_free=None, use_lattice_symmetry=True) r_free = r_free.concatenate(other=missing_flags) assert (F.data().size() == r_free.data().size()) return F, r_free
def exercise_cns_input () : from mmtbx.regression import make_fake_anomalous_data pdb_file, mtz_file = make_fake_anomalous_data.generate_cd_cl_inputs( file_base="tst_cmdline_cns") from iotbx.file_reader import any_file mtz_in = any_file("tst_cmdline_cns.mtz") f_obs = mtz_in.file_server.miller_arrays[0].average_bijvoet_mates() flags = mtz_in.file_server.miller_arrays[1].average_bijvoet_mates() f = open("tst_cmdline_cns.hkl", "w") out = StringIO() f_obs.export_as_cns_hkl( file_object=out, r_free_flags=flags) # get rid of embedded symmetry for line in out.getvalue().splitlines() : if (not "{" in line) : f.write("%s\n" % line) f.close() cmdline = mmtbx.command_line.load_model_and_data( args=["tst_cmdline_cns.pdb", "tst_cmdline_cns.hkl"], master_phil=mmtbx.command_line.generic_simple_input_phil(), process_pdb_file=False, create_fmodel=True, out=null_out()) out = StringIO() cmdline.crystal_symmetry.show_summary(f=out) assert (out.getvalue() == """\ Unit cell: (21.362, 23.436, 23.594, 90, 90, 90) Space group: P 1 (No. 1) """), out.getvalue()
def exercise(): for module in ["reduce", "probe", "phenix_regression"]: if not libtbx.env.has_module(module): print "%s not available, skipping" % module return from mmtbx.command_line import validation_summary from iotbx import file_reader import iotbx.pdb.hierarchy regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile ) out = StringIO() summary = validation_summary.run(args=[regression_pdb], out=out) assert approx_equal(summary.clashscore, 13.597, eps=0.0001) ss = easy_pickle.dumps(summary) sss = easy_pickle.loads(ss) out_1 = StringIO() out_2 = StringIO() summary.show(out=out_1) sss.show(out=out_2) assert out_1.getvalue() == out_2.getvalue() pdb_in = file_reader.any_file(regression_pdb) hierarchy = pdb_in.file_object.hierarchy new_hierarchy = iotbx.pdb.hierarchy.root() for i in range(5): model = hierarchy.only_model().detached_copy() model.id = str(i + 1) new_hierarchy.append_model(model) open("tst_validation_summary.pdb", "w").write(new_hierarchy.as_pdb_string()) out2 = StringIO() summary = validation_summary.run(args=["tst_validation_summary.pdb"], out=out2) assert type(summary).__name__ == "ensemble" print "OK"
def get_rotamers (file_name) : pdb_in = file_reader.any_file(file_name) hierarchy = pdb_in.file_object.hierarchy validate = rotalyze.rotalyze(pdb_hierarchy=hierarchy, data_version="8000", outliers_only=False) return [ (r.id_str(), r.rotamer_name) for r in validate.results ]
def validate_params(params): if ( (params.input.map_1 is None) or (params.input.map_2 is None) ): raise Sorry('Two CCP4-formatted maps are required.') # check files p = [params.input.map_1, params.input.map_2] maps = [None, None] for i in xrange(2): maps[i] = file_reader.any_file(p[i]) if (maps[i].file_type != 'ccp4_map'): raise Sorry('Please input a CCP4-formatted map for %s.' % p[i]) # check symmetry m1 = maps[0].file_object m2 = maps[1].file_object cs1 = crystal.symmetry(m1.unit_cell().parameters(), m1.space_group_number) cs2 = crystal.symmetry(m2.unit_cell().parameters(), m2.space_group_number) if (cs1.is_similar_symmetry(cs2) is False): raise Sorry('The symmetry of the two maps is not similar.') # check maps m1 = m1.map_data() m2 = m2.map_data() if ( (m1.accessor().all() != m2.accessor().all()) or (m1.accessor().focus() != m2.accessor().focus()) or (m1.accessor().origin() != m2.accessor().origin()) ): raise Sorry('The two maps are not similar.') return True
def __init__(self,pdb_file,hklmtz_file, detail,high_resolution=None,mdb_document=None,pdb_code=None, do_flips=False) : assert detail in ['file','residue'],detail assert type(do_flips) == bool self.pdb_file = pdb_file self.hklmtz_file = hklmtz_file self.detail = detail self.pdb_code = pdb_code self.high_resolution = high_resolution self.do_flips = do_flips if not pdb_code : self.pdb_code = 'N/A' pdb_in = file_reader.any_file(pdb_file) self.hierarchy = pdb_in.file_object.hierarchy args = [self.pdb_file] if self.hklmtz_file : args.append(self.hklmtz_file) self.cmdline = load_model_and_data( args=args, master_phil=generate_master_phil_with_inputs(""), require_data=False, create_fmodel=True, process_pdb_file=True, prefer_anomalous=True) # keys are res ids and values are MDBResidue objects. if self.detail == 'residue' : self.initiate_residues() self.set_mdb_document(mdb_document)
def load_all_models_in_directory (dir_name, limit_extensions=True, recursive=False) : """ Load all models in the specified directory, returning a list of file names and iotbx.file_reader objects. """ from iotbx.file_reader import any_file, guess_file_type assert os.path.isdir(dir_name) file_names_and_objects = [] for file_name in os.listdir(dir_name) : full_path = os.path.join(dir_name, file_name) if os.path.isdir(full_path) and recursive : file_names_and_objects.extend( load_all_models_in_directory(dir_name=full_path, limit_extensions=limit_extensions, recursive=True)) elif os.path.isfile(full_path) : if (limit_extensions) and (guess_file_type(full_path) != "pdb") : continue input_file = any_file(full_path, raise_sorry_if_not_expected_format=True) if (input_file.file_type == "pdb") : file_names_and_objects.append((full_path, input_file.file_object)) return file_names_and_objects
def exercise () : import mmtbx.regression from iotbx import file_reader from cStringIO import StringIO pdb_file = "tmp_em_rscc.pdb" map_file = "tmp_em_rscc.map" f = open(pdb_file, "w") for line in mmtbx.regression.model_1yjp.splitlines() : if line.startswith("ATOM") : f.write(line + "\n") f.close() pdb_in = file_reader.any_file(pdb_file).file_object symm = crystal.symmetry( space_group_symbol="P1", unit_cell=(30, 30, 30, 90, 90, 90)) xrs = pdb_in.input.xray_structure_simple(crystal_symmetry=symm) xrs.scattering_type_registry( d_min=3.0, table="electron") fc = xrs.structure_factors(d_min=3.0).f_calc() fft_map = fc.fft_map(resolution_factor=1/3).apply_sigma_scaling() i,j,k = fft_map.n_real() s = i//2 f = i//2-1 print i,j,k,s,f fft_map.as_ccp4_map( file_name=map_file, gridding_first=(-s,-s,-s), gridding_last=(f,f,f)) out = StringIO() em_rscc.run(args=[pdb_file, map_file], out=out) for line in out.getvalue().splitlines(): if line.find(" A ")==-1: continue assert abs(float(line.split()[2])-1)<0.1
def exercise_intensity_output () : if (os.path.isfile("tst_fmodel_anomalous.mtz")) : os.remove("tst_fmodel_anomalous.mtz") pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl( file_base="tst_fmodel_anomalous") # phenix.fmodel (with wavelength) args = [ pdb_file, "high_resolution=1.0", "wavelength=1.116", "obs_type=intensities", "type=real", "output.file_name=tst_fmodel_intensity.mtz", "r_free_flags_fraction=0.1", ] args2 = args + ["label=Imodel"] fmodel.run(args=args2, log=null_out()) assert os.path.isfile("tst_fmodel_intensity.mtz") mtz_in = file_reader.any_file("tst_fmodel_intensity.mtz") assert mtz_in.file_server.miller_arrays[0].is_xray_intensity_array() try : fmodel.run(args=args, log=null_out()) except Sorry : pass else : raise Exception_expected try : fmodel.run(args=args+["format=cns"], log=null_out()) except Sorry : pass else : raise Exception_expected
def exercise () : if (os.path.isfile("tst_fmodel_anomalous.mtz")) : os.remove("tst_fmodel_anomalous.mtz") pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl( file_base="tst_fmodel_anomalous") # phenix.fmodel (with wavelength) args = [ pdb_file, "high_resolution=1.0", "wavelength=1.116", "label=F", "type=real", "output.file_name=tst_fmodel_anomalous.mtz", "r_free_flags_fraction=0.1", ] fmodel.run(args=args, log=null_out()) assert os.path.isfile("tst_fmodel_anomalous.mtz") mtz_in = file_reader.any_file("tst_fmodel_anomalous.mtz") array = mtz_in.file_server.miller_arrays[0] assert (array.anomalous_flag()) anom_diffs = array.anomalous_differences() assert approx_equal(flex.max(anom_diffs.data()), 5.72, eps=0.01) # mmtbx.fmodel_simple result = easy_run.call( "mmtbx.fmodel_simple \"%s\" tst_fmodel_anomalous.mtz high_resolution=2.0" % pdb_file) print "OK"
def run (args=(), params=None, out=sys.stdout) : assert (params is not None) seq_files = params.muscle.seq_file output_file = params.muscle.output_file if (output_file is None) or (output_file == "") : output_file = os.path.join(os.getcwd(), "muscle.aln") from iotbx import file_reader from iotbx.bioinformatics import any_sequence_format, sequence seqs = [] for file_name in seq_files : if (file_name.endswith(".pdb") or file_name.endswith(".ent") or file_name.endswith(".pdb.gz") or file_name.endswith(".ent.gz")) : pdb_in = file_reader.any_file(file_name, force_type="pdb").file_object hierarchy = pdb_in.hierarchy first_model = hierarchy.models()[0] found_protein = False for chain in first_model.chains() : if chain.is_protein() : chain_seq = chain.as_padded_sequence() base_name = os.path.basename(file_name) seq_name = "%s_%s" % (os.path.splitext(base_name)[0], chain.id) seqs.append(sequence(chain_seq, seq_name)) found_protein = True if (not found_protein) : raise Sorry(("The PDB file %s does not contain any recognizable "+ "protein chains.") % file_name) else : try : seq_objects, non_compliant = any_sequence_format(file_name, assign_name_if_not_defined=True) seqs.extend(seq_objects) except Exception, e : raise Sorry(("Error parsing '%s' - not a recognizable sequence "+ "format. (Original message: %s)") % (file_name, str(e)))
def prepare_inputs (prefix="tst_build_alt_confs") : pdb_in = "%s_in.pdb" % prefix open(pdb_in, "w").write(pdb_raw) args = [ pdb_in, "high_resolution=1.2", "type=real", "label=F", "add_sigmas=True", "r_free_flags_fraction=0.1", "random_seed=12345", "output.file_name=%s.mtz" % prefix, ] fmodel.run(args=args, log=null_out()) pdb_file = file_reader.any_file(pdb_in) hierarchy = pdb_file.file_object.hierarchy xrs = pdb_file.file_object.xray_structure_simple() for chain in hierarchy.only_model().chains() : for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1) : while (len(atom_groups) > 1) : residue_group.remove_atom_group(atom_groups[-1]) del atom_groups[-1] for atom in residue_group.atoms() : atom.occ = 1.0 atom_groups[0].altloc = '' assert hierarchy.atoms().extract_occ().all_eq(1.0) open("%s_start.pdb" % prefix, "w").write( hierarchy.as_pdb_string(crystal_symmetry=xrs))
def extract_labels (params, out, parameter_scope="structure") : """ Guess MTZ file column labels for experimental data and R-free flags. Only invoked when this program is run from the command line, but the Phenix GUI does something similar. """ for i, structure in enumerate(params.structure) : if (structure.mtz_file is None) : raise Sorry("Missing MTZ file for structure #%d." % (i+1)) if ([structure.data_labels, structure.r_free_flags_label].count(None)>0) : mtz_file = file_reader.any_file(structure.mtz_file, force_type="hkl") mtz_file.assert_file_type("hkl") server = mtz_file.file_server file_name = mtz_file.file_name if (structure.data_labels is None) : print >>out, "Attempting to guess labels for %s..." % file_name data = server.get_xray_data( file_name=file_name, labels=None, ignore_all_zeros=True, parameter_scope=parameter_scope, parameter_name="data_labels") structure.data_labels = data.info().label_string() if (structure.r_free_flags_label is None) : print >>out, "Attempting to guess R-free label for %s..." % file_name rfree = server.get_r_free_flags( file_name=file_name, label=None, test_flag_value=None, disable_suitability_test=False, parameter_scope=parameter_scope+".r_free_flags") structure.r_free_flags_label = rfree[0].info().label_string()
def run (args=(), params=None, out=sys.stdout) : if (len(args) == 0) and (params is None) : raise Usage("iotbx.pdb.add_conformations model.pdb [selection=...]\n"+ "Full parameters:\n" + master_phil.as_str()) from iotbx import file_reader pdb_in = None if (params is None) : user_phil = [] interpreter = master_phil.command_line_argument_interpreter( home_scope="") for arg in args : if os.path.isfile(arg) : f = file_reader.any_file(os.path.abspath(arg)) if (f.file_type == "pdb") : pdb_in = f.file_object user_phil.append(libtbx.phil.parse( "add_conformations.pdb_file=\"%s\"" % f.file_name)) elif (f.file_type == "phil") : user_phil.append(f.file_object) else : raise Sorry("Unknown file type '%s' (%s)" % (f.file_type, arg)) else : try : arg_phil = interpreter.process(arg=arg) except RuntimeError, e : raise Sorry("Error parsing '%s': %s" % (arg, str(e))) else : user_phil.append(arg_phil)
def get_inputs(args, log, master_params, validated): inputs = mmtbx.utils.process_command_line_args( args=args, master_params=master_params, suppress_symmetry_related_errors=True) params = inputs.params.extract() print params.model_file_name # Check model file if (len(inputs.pdb_file_names) == 0 and (params.model_file_name is None)): raise Sorry("No model file found.") elif (len(inputs.pdb_file_names) == 1): params.model_file_name = inputs.pdb_file_names[0] elif (len(inputs.pdb_file_names) > 1): #else: raise Sorry("Only one model file should be given") # # Check reflection file(s) reflection_files = inputs.reflection_files if (len(reflection_files) == 0): if (params.reflection_file_name is None): raise Sorry("No reflection file found.") else: hkl_in = file_reader.any_file(params.reflection_file_name, force_type="hkl") hkl_in.assert_file_type("hkl") reflection_files = [hkl_in.file_object] # # Get crystal symmetry crystal_symmetry = None crystal_symmetry = inputs.crystal_symmetry if (crystal_symmetry is None): crystal_symmetry = obtain_cs_if_gui_input( model_file_name=params.model_file_name, reflection_file_name=params.reflection_file_name) print >> log, "Working crystal symmetry after inspecting all inputs:" crystal_symmetry.show_summary(f=log, prefix=" ") # # Get data labels f_obs, r_free_flags = None, None rfs = reflection_file_utils.reflection_file_server( crystal_symmetry=crystal_symmetry, force_symmetry=True, reflection_files=reflection_files, err=StringIO()) parameters = mmtbx.utils.data_and_flags_master_params().extract() if (params.data_labels is not None): parameters.labels = params.data_labels if (params.r_free_flags_labels is not None): parameters.r_free_flags.label = params.r_free_flags_labels determined_data_and_flags = mmtbx.utils.determine_data_and_flags( reflection_file_server=rfs, parameters=parameters, keep_going=True, working_point_group=crystal_symmetry.space_group( ).build_derived_point_group(), log=StringIO(), symmetry_safety_check=True) f_obs = determined_data_and_flags.f_obs if (params.data_labels is None): params.data_labels = f_obs.info().label_string() if (params.reflection_file_name is None): params.reflection_file_name = parameters.file_name r_free_flags = determined_data_and_flags.r_free_flags assert f_obs is not None print >> log, "Input data:" print >> log, " Iobs or Fobs:", f_obs.info().labels if (r_free_flags is not None): print >> log, " Free-R flags:", r_free_flags.info().labels params.r_free_flags_labels = r_free_flags.info().label_string() else: print >> log, " Free-R flags: Not present" model_basename = os.path.basename(params.model_file_name.split(".")[0]) if (len(model_basename) > 0 and params.output_file_name_prefix is None): params.output_file_name_prefix = model_basename new_params = master_params.format(python_object=params) print >> log, "*" * 79 new_params.show() if (not validated): validate_params(params) pdb_input = iotbx.pdb.input(file_name=params.model_file_name) pdb_hierarchy = pdb_input.construct_hierarchy() xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) # DON'T USE: # xray_structure = pdb_input.xray_structure_simple() # because the atom order might be wrong mmtbx.utils.setup_scattering_dictionaries( scattering_table=params.scattering_table, xray_structure=xray_structure, d_min=f_obs.d_min()) f_obs = f_obs.resolution_filter(d_min=params.high_resolution, d_max=params.low_resolution) if (r_free_flags is not None): r_free_flags = r_free_flags.resolution_filter( d_min=params.high_resolution, d_max=params.low_resolution) # # If data are anomalous if (f_obs.anomalous_flag()): f_obs, r_free_flags = prepare_f_obs_and_flags( f_obs=f_obs, r_free_flags=r_free_flags) return group_args(f_obs=f_obs, r_free_flags=r_free_flags, xray_structure=xray_structure, pdb_hierarchy=pdb_hierarchy, params=params)
def run(args, log=sys.stdout): if (len(args) == 0): print(legend, file=log) defaults(log=log) return # parsed = defaults(log=log) processed_args = mmtbx.utils.process_command_line_args( args=args, log=sys.stdout, master_params=parsed) params = processed_args.params.extract() reflection_files = processed_args.reflection_files if (len(reflection_files) == 0): if (params.hkl_file is None): raise Sorry("No reflection file found.") else: hkl_in = file_reader.any_file(params.hkl_file, force_type="hkl") hkl_in.assert_file_type("hkl") reflection_files = [hkl_in.file_object] crystal_symmetry = processed_args.crystal_symmetry if (crystal_symmetry is None): if (params.space_group is not None) and (params.unit_cell is not None): from cctbx import crystal crystal_symmetry = crystal.symmetry( space_group_info=params.space_group, unit_cell=params.unit_cell) else: raise Sorry("No crystal symmetry found.") if (len(processed_args.pdb_file_names) == 0): if (params.pdb_file is None): raise Sorry("No model file found.") else: pdb_file_names = [params.pdb_file] else: pdb_file_names = processed_args.pdb_file_names # rfs = reflection_file_utils.reflection_file_server( crystal_symmetry=crystal_symmetry, force_symmetry=True, reflection_files=reflection_files, err=StringIO()) parameters = mmtbx.utils.data_and_flags_master_params().extract() parameters.labels = params.f_obs_label parameters.r_free_flags.label = params.r_free_flags_label determine_data_and_flags_result = mmtbx.utils.determine_data_and_flags( reflection_file_server=rfs, parameters=parameters, keep_going=True, log=StringIO()) f_obs = determine_data_and_flags_result.f_obs print("Input data:") print(" Iobs or Fobs:", f_obs.info().labels) r_free_flags = determine_data_and_flags_result.r_free_flags print(" Free-R flags:", r_free_flags.info().labels) # parameters = mmtbx.utils.experimental_phases_params.extract() parameters.labels = params.hendrickson_lattman_coefficients_label experimental_phases_result = mmtbx.utils.determine_experimental_phases( reflection_file_server=rfs, parameters=parameters, log=StringIO(), parameter_scope="", working_point_group=None, symmetry_safety_check=True, ignore_all_zeros=True) if (experimental_phases_result is not None): print(" HL coefficients:", experimental_phases_result.info().labels) experimental_phases = extract_experimental_phases( experimental_phases=experimental_phases_result, f_obs=f_obs) # if (r_free_flags is None): r_free_flags = f_obs.array(data=flex.bool(f_obs.data().size(), False)) # pdb_inp = mmtbx.utils.pdb_inp_from_multiple_files(pdb_file_names, log=sys.stdout) model = mmtbx.model.manager(model_input=pdb_inp, process_input=False, crystal_symmetry=crystal_symmetry, log=sys.stdout) if (model.get_number_of_models() > 1): #XXX support multi-models raise Sorry("Multiple model file not supported in this tool.") # XXX Twining not supported xray_structure = model.get_xray_structure() if (not xray_structure.unit_cell().is_similar_to(f_obs.unit_cell())): raise Sorry( "The unit cells in the model and reflections files are not " + "isomorphous.") print("Input model:") print(" number of atoms:", xray_structure.scatterers().size()) fmodel = mmtbx.f_model.manager(xray_structure=xray_structure, r_free_flags=r_free_flags, f_obs=f_obs, abcd=experimental_phases) fmodel.update_all_scales( update_f_part1=True, remove_outliers=params.remove_f_obs_outliers, bulk_solvent_and_scaling=params.bulk_solvent_and_scaling) print("Overall statistics:") fmodel.info().show_all() # print("Output data:") if (params.output_file_name is not None): output_file_name = params.output_file_name else: pdb_file_bn = os.path.basename(pdb_file_names[0]) hkl_file_bn = os.path.basename(reflection_files[0].file_name()) try: pdb_file_prefix = pdb_file_bn[:pdb_file_bn.index(".")] except ValueError: pdb_file_prefix = pdb_file_bn try: hkl_file_prefix = hkl_file_bn[:hkl_file_bn.index(".")] except ValueError: hkl_file_prefix = hkl_file_bn output_file_name = "%s_%s.mtz" % (pdb_file_prefix, hkl_file_prefix) print(" file name:", output_file_name) print(" to see the contnt of %s:" % output_file_name) print(" phenix.mtz.dump %s" % output_file_name) out = open(output_file_name, "w") fmodel.export(out=out) out.close() print("All done.") return output_file_name
elif sg == 'C121': rot0 = rt_mx("x,y,z") rot1 = rt_mx("-x,y,-z") rot2 = rt_mx("x+1/2,y+1/2,z") rot3 = rt_mx("-x+1/2,y+1/2,-z") rt_mx_matrices = (rot0, rot1, rot2, rot3) else: print "%s not found\n" % sg sys.exit() return rt_mx_matrices pdb_in = file_reader.any_file(pdb_file).file_object pdb_hierarchy = pdb_in.construct_hierarchy() xrs = pdb_in.xray_structure_simple() rt_mx_matrices = get_symm(sg) unit_cell = xrs.unit_cell() import cctbx from cctbx import uctbx uc1 = cctbx.uctbx.unit_cell(parameters=uc) #~ print uc1.parameters() #~ uc1.show_parameters() #~ print uc1.volume() symm = pdb_in.crystal_symmetry() space_group = symm.space_group()
def run(args, out=sys.stdout, auto_extract_labels=True, use_current_directory_if_not_specified=False, warn=True): master_params = libtbx.phil.parse(master_phil_str, process_includes=True) if (len(args) == 0): print("""\ ************************************************************************ phenix.table_one - statistics harvesting for publication ************************************************************************ note: this is somewhat difficult to configure on the command line at present; you may find it more convenient to use the PHENIX GUI. """, file=out) print("# Parameter template for phenix.table_one:", file=out) master_params.show(out=out) print("# (the 'structure' scope may be copied as many times as ", file=out) print("# necessary to handle multiple datasets.)", file=out) print("# Alternate usage:", file=out) print("# phenix.table_one model.pdb data.mtz [logfile]*", file=out) return None if (warn): print(""" note: this is somewhat difficult to configure on the command line at present; you may find it more convenient to use the PHENIX GUI. """, file=out) time.sleep(2) master_parmas = libtbx.phil.parse(master_phil_str) interpreter = libtbx.phil.command_line.argument_interpreter( master_phil=master_params, home_scope="table_one") file_phil = [] cmdline_phil = [] pdb_file = None mtz_file = None unmerged_data = None log_files = [] for arg in args : if os.path.isfile(arg): f = file_reader.any_file(arg) if (f.file_type == "phil"): file_phil.append(f.file_object) elif (f.file_type == "pdb"): pdb_file = f.file_name elif (f.file_type == "hkl"): mtz_file = f.file_name elif (f.file_type == "txt"): log_files.append(f.file_name) else : if arg.startswith("unmerged_data="): unmerged_data = os.path.abspath("=".join(arg.split("=")[1:])) continue if arg.startswith("--"): arg = arg[2:] + "=True" try : arg_phil = interpreter.process(arg=arg) except RuntimeError : print("Ignoring unknown argument %s" % arg, file=out) else : cmdline_phil.append(arg_phil) working_phil = master_params.fetch(sources=file_phil+cmdline_phil) params = working_phil.extract() if (pdb_file is not None): if (len(params.table_one.structure) > 0): raise Sorry("You already have a structure defined in the parameter "+ "file; to add structures, you should edit the parameters instead of "+ "specifying additional PDB and data files on the command line.") if (mtz_file is None): raise Sorry("You have supplied a PDB file, but no corresponding MTZ "+ "file.") log_file_str = "\n".join([ "log_file=%s" % f for f in log_files ]) structure_params = libtbx.phil.parse(structure_params_str) new_structure = structure_params.extract().structure[0] new_structure.pdb_file = pdb_file new_structure.mtz_file = mtz_file new_structure.unmerged_data = unmerged_data params.table_one.structure.append(new_structure) if auto_extract_labels : extract_labels(params.table_one, out=out) if use_current_directory_if_not_specified : if (params.table_one.output.directory is None): params.table_one.output.directory = os.getcwd() validate_params(params) if (params.table_one.multiprocessing.nproc is None): params.table_one.multiprocessing.nproc = 1 final_phil = master_params.format(python_object=params) if params.table_one.output.verbose : print("", file=out) print("#Final effective parameters:", file=out) final_phil.show(out=out) print("#---end", file=out) print("", file=out) final_phil.show(out=open("table_one.eff", "w")) table1 = table_one(params.table_one, out=out) easy_pickle.dump("%s.pkl" % params.table_one.output.base_name, table1) table1.save_multiple( file_base=params.table_one.output.base_name, formats=params.table_one.output.format) return table1
def __init__(self, pdb_file, output_file=None, log=None, quiet=False, set_se_occ=True, remove_atoms_with_zero_occupancy=False): from iotbx.file_reader import any_file import iotbx.pdb if (log is None): log = null_out() pdb_in = any_file(pdb_file, force_type="pdb") pdb_in.assert_file_type("pdb") hierarchy = pdb_in.file_object.hierarchy if (len(hierarchy.models()) > 1): raise Sorry("Multi-MODEL PDB files are not supported.") n_unknown = 0 all_atoms = hierarchy.atoms() cache = hierarchy.atom_selection_cache() # resname UNK is now okay (with some restrictions) known_sel = cache.selection("not (element X or resname UNX or resname UNL)") semet_sel = cache.selection("element SE and resname MSE") zero_occ_sel = all_atoms.extract_occ() == 0 self.n_unknown = known_sel.count(False) self.n_semet = semet_sel.count(True) self.n_zero_occ = zero_occ_sel.count(True) keep_sel = known_sel modified = False if ((self.n_unknown > 0) or ((self.n_semet > 0) and (set_se_occ)) or (self.n_zero_occ > 0) and (remove_atoms_with_zero_occupancy)): modified = True if (output_file is None): output_file = pdb_file if (self.n_unknown > 0) and (not quiet): print >> log, "Warning: %d unknown atoms or ligands removed:" % \ self.n_unknown for i_seq in (~known_sel).iselection(): print >> log, " %s" % all_atoms[i_seq].id_str() if (self.n_zero_occ > 0): msg = "Warning: %d atoms with zero occupancy present in structure:" if (remove_atoms_with_zero_occupancy): msg = "Warning: %d atoms with zero occupancy removed:" keep_sel &= ~zero_occ_sel if (not quiet): print >> log, msg % self.n_zero_occ for i_seq in zero_occ_sel.iselection(): print >> log, " %s" % all_atoms[i_seq].id_str() hierarchy_filtered = hierarchy.select(keep_sel) if (self.n_semet > 0) and (set_se_occ): for atom in hierarchy_filtered.atoms(): if (atom.element == "SE") and (atom.fetch_labels().resname == "MSE"): if (atom.occ == 1.0): if (not quiet): print >> log, "Set occupancy of %s to 0.99" % atom.id_str() atom.occ = 0.99 # just enough to trigger occupancy refinement if (modified): f = open(output_file, "w") # if the input file is actually from the PDB, we need to preserve the # header information for downstream code. print >> f, "\n".join(pdb_in.file_object.input.title_section()) print >> f, "\n".join(pdb_in.file_object.input.remark_section()) print >> f, iotbx.pdb.format_cryst1_record( crystal_symmetry=pdb_in.file_object.crystal_symmetry()) print >> f, hierarchy_filtered.as_pdb_string() f.close()
def run(self, args, command_name, out=sys.stdout): command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description='Example: %s data.mtz data.mtz ref_model.pdb' % command_name).option( None, "--show_defaults", action="store_true", help="Show list of parameters.")).process(args=args) cif_file = None processed_args = utils.process_command_line_args( args=args, log=sys.stdout, master_params=master_phil) params = processed_args.params if (params is None): params = master_phil self.params = params.extract().ensemble_probability pdb_file_names = processed_args.pdb_file_names if len(pdb_file_names) != 1: raise Sorry("Only one PDB structure may be used") pdb_file = file_reader.any_file(pdb_file_names[0]) self.log = multi_out() self.log.register(label="stdout", file_object=sys.stdout) self.log.register(label="log_buffer", file_object=StringIO(), atexit_send_to=None) sys.stderr = self.log log_file = open( pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.log', "w") self.log.replace_stringio(old_label="log_buffer", new_label="log", new_file_object=log_file) utils.print_header(command_name, out=self.log) params.show(out=self.log) # f_obs = None r_free_flags = None reflection_files = processed_args.reflection_files if self.params.fobs_vs_fcalc_post_nll: if len(reflection_files) == 0: raise Sorry( "Fobs from input MTZ required for fobs_vs_fcalc_post_nll") if len(reflection_files) > 0: crystal_symmetry = processed_args.crystal_symmetry print('Reflection file : ', processed_args.reflection_file_names[0], file=self.log) utils.print_header("Model and data statistics", out=self.log) rfs = reflection_file_server( crystal_symmetry=crystal_symmetry, reflection_files=processed_args.reflection_files, log=self.log) parameters = extract_xtal_data.data_and_flags_master_params( ).extract() determine_data_and_flags_result = extract_xtal_data.run( reflection_file_server=rfs, parameters=parameters, keep_going=True) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if (r_free_flags is None): r_free_flags = f_obs.array( data=flex.bool(f_obs.data().size(), False)) # process PDB pdb_file.assert_file_type("pdb") # pdb_in = hierarchy.input(file_name=pdb_file.file_name) ens_pdb_hierarchy = pdb_in.construct_hierarchy() ens_pdb_hierarchy.atoms().reset_i_seq() ens_pdb_xrs_s = pdb_in.input.xray_structures_simple() number_structures = len(ens_pdb_xrs_s) print('Number of structure in ensemble : ', number_structures, file=self.log) # Calculate sigmas from input map only if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None: # process MTZ input_file = file_reader.any_file( self.params.ensemble_sigma_map_input) if input_file.file_type == "hkl": if input_file.file_object.file_type() != "ccp4_mtz": raise Sorry("Only MTZ format accepted for map input") else: mtz_file = input_file else: raise Sorry("Only MTZ format accepted for map input") miller_arrays = mtz_file.file_server.miller_arrays map_coeffs_1 = miller_arrays[0] # xrs_list = [] for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): # get sigma levels from ensemble fc for each structure xrs = get_map_sigma(ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, map_coeffs_1=map_coeffs_1, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, log=self.log) xrs_list.append(xrs) # write ensemble pdb file, occupancies as sigma level filename = pdb_file_names[0].split('/')[-1].replace( '.pdb', '') + '_vs_' + self.params.ensemble_sigma_map_input.replace( '.mtz', '') + '_pensemble.pdb' write_ensemble_pdb(filename=filename, xrs_list=xrs_list, ens_pdb_hierarchy=ens_pdb_hierarchy) # Do full analysis vs Fobs else: model_map_coeffs = [] fmodel = None # Get <fcalc> for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): ens_pdb_xrs.set_occupancies(1.0) if model == 0: # If mtz not supplied get fobs from xray structure... # Use input Fobs for scoring against nll if self.params.fobs_vs_fcalc_post_nll: dummy_fobs = f_obs else: if f_obs == None: if self.params.fcalc_high_resolution == None: raise Sorry( "Please supply high resolution limit or input mtz file." ) dummy_dmin = self.params.fcalc_high_resolution dummy_dmax = self.params.fcalc_low_resolution else: print( 'Supplied mtz used to determine high and low resolution cuttoffs', file=self.log) dummy_dmax, dummy_dmin = f_obs.d_max_min() # dummy_fobs = abs( ens_pdb_xrs.structure_factors( d_min=dummy_dmin).f_calc()) dummy_fobs.set_observation_type_xray_amplitude() # If mtz supplied, free flags are over written to prevent array size error r_free_flags = dummy_fobs.array( data=flex.bool(dummy_fobs.data().size(), False)) # fmodel = utils.fmodel_simple( scattering_table="wk1995", xray_structures=[ens_pdb_xrs], f_obs=dummy_fobs, target_name='ls', bulk_solvent_and_scaling=False, r_free_flags=r_free_flags) f_calc_ave = fmodel.f_calc().array( data=fmodel.f_calc().data() * 0).deep_copy() # XXX Important to ensure scale is identical for each model and <model> fmodel.set_scale_switch = 1.0 f_calc_ave_total = fmodel.f_calc().data().deep_copy() else: fmodel.update_xray_structure(xray_structure=ens_pdb_xrs, update_f_calc=True, update_f_mask=False) f_calc_ave_total += fmodel.f_calc().data().deep_copy() print('Model :', model + 1, file=self.log) print("\nStructure vs real Fobs (no bulk solvent or scaling)", file=self.log) print('Rwork : %5.4f ' % fmodel.r_work(), file=self.log) print('Rfree : %5.4f ' % fmodel.r_free(), file=self.log) print('K1 : %5.4f ' % fmodel.scale_k1(), file=self.log) fcalc_edm = fmodel.electron_density_map() fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type='Fc') fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset( column_root_label='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_mtz_dataset.mtz_object().write( file_name=str(model + 1) + "_Fc.mtz") model_map_coeffs.append(fcalc_map_coeffs.deep_copy()) fmodel.update(f_calc=f_calc_ave.array(f_calc_ave_total / number_structures)) print("\nEnsemble vs real Fobs (no bulk solvent or scaling)", file=self.log) print('Rwork : %5.4f ' % fmodel.r_work(), file=self.log) print('Rfree : %5.4f ' % fmodel.r_free(), file=self.log) print('K1 : %5.4f ' % fmodel.scale_k1(), file=self.log) # Get <Fcalc> map fcalc_ave_edm = fmodel.electron_density_map() fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients( map_type='Fc').deep_copy() fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset( column_root_label='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_ave_mtz_dataset.mtz_object().write(file_name="aveFc.mtz") fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map() fcalc_ave_map_coeffs.apply_volume_scaling() fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded() fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data) print("<Fcalc> Map Stats :", file=self.log) fcalc_ave_map_stats.show_summary(f=self.log) offset = fcalc_ave_map_stats.min() model_neg_ll = [] number_previous_scatters = 0 # Run through structure list again and get probability xrs_list = [] for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): if self.params.verbose: print('\n\nModel : ', model + 1, file=self.log) # Get model atom sigmas vs Fcalc fcalc_map = model_map_coeffs[model].fft_map() fcalc_map.apply_volume_scaling() fcalc_map_data = fcalc_map.real_map_unpadded() fcalc_map_stats = maptbx.statistics(fcalc_map_data) if self.params.verbose: print("Fcalc map stats :", file=self.log) fcalc_map_stats.show_summary(f=self.log) xrs = get_map_sigma( ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, fft_map_1=fcalc_map, model_i=model, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, number_previous_scatters=number_previous_scatters, log=self.log) fcalc_sigmas = xrs.scatterers().extract_occupancies() del fcalc_map # Get model atom sigmas vs <Fcalc> xrs = get_map_sigma( ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, fft_map_1=fcalc_ave_map_coeffs, model_i=model, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, number_previous_scatters=number_previous_scatters, log=self.log) ### For testing other residue averaging options #print xrs.residue_selections fcalc_ave_sigmas = xrs.scatterers().extract_occupancies() # Probability of model given <model> prob = fcalc_ave_sigmas / fcalc_sigmas # XXX debug option if False: for n, p in enumerate(prob): print(' {0:5d} {1:5.3f}'.format(n, p), file=self.log) # Set probabilty between 0 and 1 # XXX Make Histogram / more stats prob_lss_zero = flex.bool(prob <= 0) prob_grt_one = flex.bool(prob > 1) prob.set_selected(prob_lss_zero, 0.001) prob.set_selected(prob_grt_one, 1.0) xrs.set_occupancies(prob) xrs_list.append(xrs) sum_neg_ll = sum(-flex.log(prob)) model_neg_ll.append((sum_neg_ll, model)) if self.params.verbose: print('Model probability stats :', file=self.log) print(prob.min_max_mean().show(), file=self.log) print(' Count < 0.0 : ', prob_lss_zero.count(True), file=self.log) print(' Count > 1.0 : ', prob_grt_one.count(True), file=self.log) # For averaging by residue number_previous_scatters += ens_pdb_xrs.sites_cart().size() # write ensemble pdb file, occupancies as sigma level write_ensemble_pdb( filename=pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.pdb', xrs_list=xrs_list, ens_pdb_hierarchy=ens_pdb_hierarchy) # XXX Test ordering models by nll # XXX Test removing nth percentile atoms if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll: for percentile in [1.0, 0.975, 0.95, 0.9, 0.8, 0.6, 0.2]: model_neg_ll = sorted(model_neg_ll) f_calc_ave_total_reordered = None print_list = [] for i_neg_ll in model_neg_ll: xrs = xrs_list[i_neg_ll[1]] nll_occ = xrs.scatterers().extract_occupancies() # Set q=0 nth percentile atoms sorted_nll_occ = sorted(nll_occ, reverse=True) number_atoms = len(sorted_nll_occ) percentile_prob_cutoff = sorted_nll_occ[ int(number_atoms * percentile) - 1] cutoff_selections = flex.bool( nll_occ < percentile_prob_cutoff) cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected( cutoff_selections, 0.0) #XXX Debug if False: print('\nDebug') for x in range(len(cutoff_selections)): print(cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x]) print(percentile) print(percentile_prob_cutoff) print(cutoff_selections.count(True)) print(cutoff_selections.size()) print(cutoff_nll_occ.count(0.0)) print('Count q = 1 : ', cutoff_nll_occ.count(1.0)) print('Count scatterers size : ', cutoff_nll_occ.size()) xrs.set_occupancies(cutoff_nll_occ) fmodel.update_xray_structure(xray_structure=xrs, update_f_calc=True, update_f_mask=True) if f_calc_ave_total_reordered == None: f_calc_ave_total_reordered = fmodel.f_calc().data( ).deep_copy() f_mask_ave_total_reordered = fmodel.f_masks( )[0].data().deep_copy() cntr = 1 else: f_calc_ave_total_reordered += fmodel.f_calc().data( ).deep_copy() f_mask_ave_total_reordered += fmodel.f_masks( )[0].data().deep_copy() cntr += 1 fmodel.update( f_calc=f_calc_ave.array( f_calc_ave_total_reordered / cntr).deep_copy(), f_mask=f_calc_ave.array( f_mask_ave_total_reordered / cntr).deep_copy()) # Update solvent and scale # XXX Will need to apply_back_trace on latest version fmodel.set_scale_switch = 0 fmodel.update_all_scales() # Reset occ for outout xrs.set_occupancies(nll_occ) # k1 updated vs Fobs if self.params.fobs_vs_fcalc_post_nll: print_list.append([ cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free() ]) # Order models by nll and print summary print( '\nModels ranked by nll <Fcalc> R-factors recalculated', file=self.log) print('Percentile cutoff : {0:5.3f}'.format(percentile), file=self.log) xrs_list_sorted_nll = [] print(' | NLL <Rw> <Rf> Ens Model', file=self.log) for info in print_list: print(' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'. format( info[0], info[1], info[3], info[4], info[2] + 1, ), file=self.log) xrs_list_sorted_nll.append(xrs_list[info[2]]) # Output nll ordered ensemble write_ensemble_pdb( filename='nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.pdb', xrs_list=xrs_list_sorted_nll, ens_pdb_hierarchy=ens_pdb_hierarchy)
def run2(args, log=sys.stdout): """ Fetches pdb files and/or reflection data from the PDB. Parameters ---------- args : list of str log : file, optional Returns ------- str or list of str List of file names that were downloaded. """ if len(args) < 1: raise Usage("""\ phenix.fetch_pdb [-x|-f|--all] [--mtz] [-q] ID1 [ID2, ...] Command-line options: -x Get structure factors (mmCIF file) -c Get model file in mmCIF format -f Get sequence (FASTA file) --all Download all available data --mtz Download structure factors and PDB file, and generate MTZ -q suppress printed output """) from iotbx.pdb.fetch import get_pdb quiet = False convert_to_mtz = False data_type = "pdb" format = "pdb" mirror = "rcsb" ids = [] for arg in args: if (arg == "--all"): data_type = "all" elif (arg == "-x"): data_type = "xray" elif (arg == "-f"): data_type = "fasta" elif (arg == "-q"): quiet = True elif (arg == "--mtz"): convert_to_mtz = True data_type = "all" elif (arg == "-c"): format = "cif" elif (arg.startswith("--mirror=")): mirror = arg.split("=")[1] if (not mirror in ["rcsb", "pdbe", "pdbj"]): raise Sorry( "Unrecognized mirror site '%s' (choices: rcsb, pdbe, pdbj)" % mirror) else: ids.append(arg) if (len(ids) == 0): raise Sorry("No PDB IDs specified.") if (data_type != "all"): #mirror = "rcsb" files = [] for id in ids: files.append(get_pdb(id, data_type, mirror, log, format=format)) if (len(files) == 1): return files[0] return files else: files = [] for id in ids: for data_type_, data_format in [("pdb", "pdb"), ("fasta", "pdb"), ("xray", "pdb"), ("pdb", "cif")]: files.append( get_pdb(id, data_type_, mirror, log, format=data_format)) if (convert_to_mtz): misc_args = [ "--merge", "--map_to_asu", "--extend_flags", "--ignore_bad_sigmas" ] easy_run.call("phenix.cif_as_mtz %s-sf.cif %s" % (id, " ".join(misc_args))) if os.path.isfile("%s-sf.mtz" % id): os.rename("%s-sf.mtz" % id, "%s.mtz" % id) print >> log, "Converted structure factors saved to %s.mtz" % id # os.remove("%s-sf.cif" % id) files[-1] = os.path.abspath("%s.mtz" % id) if (not os.path.isfile("%s.mtz" % id)): raise Sorry( "MTZ conversion failed - try running phenix.cif_as_mtz " + "manually (and check %s-sf.cif for format errors)." % id) from iotbx.file_reader import any_file mtz_in = any_file("%s.mtz" % id) mtz_in.assert_file_type("hkl") for array in mtz_in.file_server.miller_arrays: if (array.anomalous_flag()): print >> log, " %s is anomalous" % array.info( ).label_string() return files
def __init__(self, mtz_file, pdb_file, wilson_b=None, data_label=None, n_resolution_bins=20, n_intensity_bins=20, out=None): if (out is None): out = sys.stdout if (wilson_b is None) or (pdb_file is None): print("""\ WARNING: missing desired Wilson B-factor and/or PDB file for noise profile data. Without this information the intensity falloff with resolution will probably not be the same for your synthetic data and the data used to generate sigmas. """, file=out) self._resolution_bins = [] from iotbx.file_reader import any_file from scitbx.array_family import flex f = any_file(mtz_file, force_type="hkl") f.assert_file_type("hkl") miller_arrays = f.file_server.miller_arrays f_obs = None i_obs = None for array in miller_arrays: if (array.info().label_string() == data_label) or (data_label is None): if (array.is_xray_amplitude_array()) and (f_obs is None): f_obs = array elif (array.is_xray_intensity_array()) and (i_obs is None): i_obs = array if (i_obs is None): assert (f_obs is not None) and (f_obs.sigmas() is not None) i_obs = f_obs.f_as_f_sq() assert (i_obs.sigmas() is not None) if (wilson_b is not None) and (pdb_file is not None): print(" Correcting reference data intensity falloff...", file=out) f_obs = i_obs.f_sq_as_f() pdb_hierarchy = any_file(pdb_file).file_object.hierarchy n_residues, n_bases = get_counts(pdb_hierarchy) iso_scale, aniso_scale = wilson_scaling(F=f_obs, n_residues=n_residues, n_bases=n_bases) # TODO anisotropic? print(" Scaling statistics for unmodified reference data:", file=out) show_b_factor_info(iso_scale, aniso_scale, out=out) delta_b = wilson_b - iso_scale.b_wilson f_obs = f_obs.apply_debye_waller_factors(b_iso=delta_b) i_obs = f_obs.f_as_f_sq() i_mean = flex.max(i_obs.data()) i_norm = i_obs.customized_copy(data=i_obs.data() / i_mean, sigmas=i_obs.sigmas() / i_mean) i_norm.setup_binner(n_bins=20) i_over_sigma = i_obs.data() / i_obs.sigmas() for i_bin in i_norm.binner().range_used(): sel = i_norm.binner().selection(i_bin) i_shell = i_norm.select(sel) sn_shell = i_over_sigma.select(sel) noise_bins = shell_intensity_bins(i_norm=i_shell, i_over_sigma=sn_shell, n_bins=n_intensity_bins) self._resolution_bins.append(noise_bins)
def __init__(self, params, hkl_in=None, pdb_in=None, out=sys.stdout): adopt_init_args(self, locals()) self.params = params self.out = out self.pdb_hierarchy = None if (params.pdb_file is None) and (params.hkl_file is None): raise Sorry("No PDB file specified.") if (params.generate_noise.add_noise) and (params.hkl_file is None): if (params.generate_noise.noise_profile_file is None): raise Sorry( "noise_profile_file required when add_noise=True and " "hkl_file is undefined.") if (pdb_in is None) and (params.pdb_file is not None): f = file_reader.any_file(params.pdb_file, force_type="pdb") f.assert_file_type("pdb") self.pdb_in = f.file_object if (self.hkl_in is None) and (params.hkl_file is not None): f = file_reader.any_file(params.hkl_File, force_type="hkl") f.assert_file_type("hkl") self.hkl_in = f.file_object if (self.pdb_in is not None): self.pdb_hierarchy = self.pdb_in.hierarchy if (self.hkl_in is not None): make_header("Extracting experimental data", out=sys.stdout) f_raw, r_free = self.from_hkl() elif (self.pdb_in is not None): make_header("Generating fake data with phenix.fmodel", out=sys.stdout) f_raw, r_free = self.from_pdb() if (params.r_free_flags.file_name is not None): f_raw, r_free = self.import_r_free_flags(f_raw) self.r_free = r_free make_header("Applying low-resolution filtering", out=sys.stdout) print(" Target resolution: %.2f A" % params.d_min, file=out) self.n_residues, self.n_bases = None, None if (self.pdb_in is not None): self.n_residues, self.n_bases = get_counts(self.pdb_hierarchy) #if (params.auto_adjust): # if (pdb_in is None): # raise Sorry("You must supply a PDB file when auto_adjust=True.") self.f_out = self.truncate_data(f_raw) if (params.generate_noise.add_noise): make_header("Adding noise using sigma profile", out=sys.stdout) if (self.f_out.sigmas() is None): if (self.pdb_in is not None): iso_scale, aniso_scale = wilson_scaling( self.f_out, self.n_residues, self.n_bases) i_obs = create_sigmas(f_obs=self.f_out, params=params.generate_noise, wilson_b=iso_scale.b_wilson, return_as_amplitudes=False) apply_sigma_noise(i_obs) self.f_out = i_obs.f_sq_as_f() make_header("Done processing", out=sys.stdout) print(" Completeness after processing: %.2f%%" % (self.f_out.completeness() * 100.), file=out) print(" Final resolution: %.2f A" % self.f_out.d_min(), file=out) if (self.pdb_in is not None): iso_scale, aniso_scale = wilson_scaling(self.f_out, self.n_residues, self.n_bases) print("", file=out) print(" Scaling statistics for output data:", file=out) show_b_factor_info(iso_scale, aniso_scale, out=out) print("", file=out) self.write_output()
def run(args, out=None): if (out is None): out = sys.stdout make_header("mmtbx.simulate_low_res_data", out=out) print(""" For generation of realistic data (model-based, or using real high-resolution data) for methods development. *********************************** WARNING: *********************************** this is an experimental program - definitely NOT bug-free. Use at your own risk! Usage: mmtbx.simulate_low_res_data model.pdb [options...] (generate data from a PDB file) mmtbx.simulate_low_res_data highres.mtz [model.pdb] [options...] (truncate high-resolution data) mmtbx.simulate_low_res_data --help (print full parameters with additional info) """, file=out) if (len(args) == 0) or ("--help" in args): print("# full parameters:", file=out) if ("--help" in args): master_phil.show(attributes_level=1) else: master_phil.show() return from iotbx import file_reader interpreter = master_phil.command_line_argument_interpreter( home_scope="simulate_data") pdb_in = None pdb_hierarchy = None hkl_in = None user_phil = [] for arg in args: if os.path.isfile(arg): f = file_reader.any_file(arg) if (f.file_type == "pdb"): pdb_in = f.file_object user_phil.append( interpreter.process(arg="pdb_file=%s" % f.file_name)) elif (f.file_type == "hkl"): hkl_in = f.file_object user_phil.append( interpreter.process(arg="hkl_file=%s" % f.file_name)) elif (f.file_type == "phil"): user_phil.append(f.file_object) else: try: arg_phil = interpreter.process(arg=arg) except RuntimeError: print("ignoring uninterpretable argument '%s'" % arg, file=out) else: user_phil.append(arg_phil) working_phil = master_phil.fetch(sources=user_phil) make_header("Working parameters", out=out) working_phil.show(prefix=" ") params_ = working_phil.extract() params = params_.simulate_data prepare_data(params=params, hkl_in=hkl_in, pdb_in=pdb_in, out=out)