def exercise(space_group_info, anomalous_flag, d_min=1.0, reflections_per_bin=200, n_bins=10, verbose=0): elements = ("N", "C", "C", "O") * 5 structure_factors = random_structure.xray_structure( space_group_info, elements=elements, volume_per_atom=50., min_distance=1.5, general_positions_only=True, use_u_aniso=False, u_iso=adptbx.b_as_u(10) ).structure_factors( anomalous_flag=anomalous_flag, d_min=d_min, algorithm="direct") if (0 or verbose): structure_factors.xray_structure().show_summary() asu_contents = dicts.with_default_value(0) for elem in elements: asu_contents[elem] += 1 f_calc = abs(structure_factors.f_calc()) f_calc.setup_binner( auto_binning=True, reflections_per_bin=reflections_per_bin, n_bins=n_bins) if (0 or verbose): f_calc.binner().show_summary() for k_given in [1,0.1,0.01,10,100]: f_obs = miller.array( miller_set=f_calc, data=f_calc.data()*k_given).set_observation_type_xray_amplitude() f_obs.use_binner_of(f_calc) wp = statistics.wilson_plot(f_obs, asu_contents, e_statistics=True) if (0 or verbose): print "wilson_k, wilson_b:", wp.wilson_k, wp.wilson_b print "space group:", space_group_info.group().type().hall_symbol() print "<E^2-1>:", wp.mean_e_sq_minus_1 assert 0.8 < wp.wilson_k/k_given < 1.2 assert 0.64 < wp.wilson_intensity_scale_factor/(k_given*k_given) < 1.44 assert 9 < wp.wilson_b < 11 assert wp.xy_plot_info().fit_correlation == wp.fit_correlation if space_group_info.group().is_centric(): assert 0.90 < wp.mean_e_sq_minus_1 < 1.16 assert 3.15 < wp.percent_e_sq_gt_2 < 6.5 else: assert 0.65 < wp.mean_e_sq_minus_1 < 0.90 assert 1.0 < wp.percent_e_sq_gt_2 < 3.15 assert wp.normalised_f_obs.size() == f_obs.size() f_obs = f_calc.array(data=flex.double(f_calc.indices().size(), 0)) f_obs.use_binner_of(f_calc) n_bins = f_obs.binner().n_bins_used() try: statistics.wilson_plot(f_obs, asu_contents) except RuntimeError, e: assert not show_diff(str(e), """\ wilson_plot error: %d empty bins: Number of bins: %d Number of f_obs > 0: 0 Number of f_obs <= 0: %d""" % (n_bins, n_bins, f_obs.indices().size()))
def exercise(): verbose = "--verbose" in sys.argv[1:] quick = "--quick" in sys.argv[1:] list_cif = server.mon_lib_list_cif() srv = server.server(list_cif=list_cif) print("srv.root_path:", srv.root_path) default_switch = "--default_off" not in sys.argv[1:] if (False or default_switch): monomers_with_commas = {} atom_id_counts = dicts.with_default_value(0) for row_id in list_cif.cif["comp_list"]["_chem_comp.id"]: if (quick and random.random() < 0.95): continue if (verbose): print("id:", row_id) comp_comp_id = srv.get_comp_comp_id_direct(comp_id=row_id) if (comp_comp_id is None): print("Could not instantiating comp_comp_id(%s)" % row_id) else: has_primes = False has_commas = False for atom in comp_comp_id.atom_list: atom_id_counts[atom.atom_id] += 1 if (atom.atom_id.find("'") >= 0): has_primes = True if (atom.atom_id.find(",") >= 0): has_commas = True if (has_commas): monomers_with_commas[ comp_comp_id.chem_comp.id] = has_primes print(monomers_with_commas) atom_ids = flex.std_string(list(atom_id_counts.keys())) counts = flex.size_t(list(atom_id_counts.values())) perm = flex.sort_permutation(data=counts, reverse=True) atom_ids = atom_ids.select(perm) counts = counts.select(perm) for atom_id, count in zip(atom_ids, counts): print(atom_id, count) if (False or default_switch): for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows(): if (quick and random.random() < 0.95): continue if (verbose): print("id:", row["_chem_comp.id"]) comp_comp_id = srv.get_comp_comp_id_direct( comp_id=row["_chem_comp.id"]) check_chem_comp(cif_types.chem_comp(**row), comp_comp_id) if ("--pickle" in sys.argv[1:]): easy_pickle.dump("mon_lib.pickle", srv) if (False or default_switch): comp = srv.get_comp_comp_id_direct("GLY") comp.show() mod = srv.mod_mod_id_dict["COO"] comp.apply_mod(mod).show() if (False or default_switch): comp = srv.get_comp_comp_id_direct("LYS") comp.show() mod = srv.mod_mod_id_dict["B2C"] comp.apply_mod(mod).show() if (False or default_switch): for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows(): if (quick and random.random() < 0.95): continue comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"]) if (comp_comp_id is not None): if (comp_comp_id.classification == "peptide"): print(comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name, end=' ') print(row["_chem_comp.group"], end=' ') grp = row["_chem_comp.group"].lower().strip() if (grp not in ("l-peptide", "d-peptide", "polymer")): print("LOOK", end=' ') #if (not os.path.isdir("look")): os.makedirs("look") #open("look/%s.cif" % row["_chem_comp.id"], "w").write( #open(comp_comp_id.file_name).read()) print() elif (row["_chem_comp.group"].lower().find("peptide") >= 0 or comp_comp_id.chem_comp.group.lower().find("peptide") >= 0): print(comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name, end=' ') print(row["_chem_comp.group"], "MISMATCH") if (comp_comp_id.classification in ("RNA", "DNA")): print(comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name, end=' ') print(row["_chem_comp.group"], end=' ') if (comp_comp_id.classification != row["_chem_comp.group"].strip()): print(comp_comp_id.classification, "MISMATCH", end=' ') print() elif (row["_chem_comp.group"].lower().find("NA") >= 0 or comp_comp_id.chem_comp.group.lower().find("NA") >= 0): print(comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name, end=' ') print(row["_chem_comp.group"], "MISMATCH") if (False or default_switch): for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows(): if (quick and random.random() < 0.95): continue comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"]) if (comp_comp_id is not None): planes = comp_comp_id.get_planes() for plane in planes: dist_esd_dict = {} for plane_atom in plane.plane_atoms: dist_esd_dict[str(plane_atom.dist_esd)] = 0 # FIXME: might break compat for py2/3 because indexing a values call if (len(dist_esd_dict) != 1 or list(dist_esd_dict.keys())[0] != "0.02"): print(comp_comp_id.chem_comp.id, plane.plane_id, end=' ') print(list(dist_esd_dict.keys())) if (False or default_switch): standard_amino_acids = [ "GLY", "VAL", "ALA", "LEU", "ILE", "PRO", "MET", "PHE", "TRP", "SER", "THR", "TYR", "CYS", "ASN", "GLN", "ASP", "GLU", "LYS", "ARG", "HIS" ] for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows(): is_standard_aa = row["_chem_comp.id"] in standard_amino_acids if (1 and not is_standard_aa): continue comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"]) if (is_standard_aa): assert comp_comp_id is not None assert comp_comp_id.chem_comp.group.strip() == "L-peptide" if (comp_comp_id is not None): print(comp_comp_id.chem_comp.id.strip(), end=' ') print(comp_comp_id.chem_comp.name.strip(), end=' ') print(comp_comp_id.chem_comp.group.strip()) for tor in comp_comp_id.tor_list: print(" tor:", tor.atom_id_1, tor.atom_id_2, end=' ') print(tor.atom_id_3, tor.atom_id_4, tor.value_angle, end=' ') print(tor.value_angle_esd, tor.period) for chir in comp_comp_id.chir_list: print(" chir:", chir.atom_id_centre, chir.atom_id_1, end=' ') print(chir.atom_id_2, chir.atom_id_3, chir.volume_sign) if (False or default_switch): elib = server.ener_lib() if (False or default_switch): for syn in elib.lib_synonym.items(): print(syn) if (False or default_switch): for vdw in elib.lib_vdw: vdw.show() print("OK")
def calculate_cell_content(xray_structure): result = dicts.with_default_value(0) for sc in xray_structure.scatterers(): result[sc.scattering_type] += sc.occupancy * sc.multiplicity() return result
def run(): command_line = (option_parser( usage="usage: cctbx.euclidean_model_matching [OPTIONS] " "reference_structure.pickle structure.pickle", description="") .option("--tolerance", type="float", default=3) .option("--match_hydrogens", type='bool', default=True) ).process(args=sys.argv[1:]) if len(command_line.args) != 2: command_line.parser.print_help() sys.exit(1) reference_structure = easy_pickle.load(command_line.args[0]) if (type(reference_structure) in (type([]), type(()))): reference_structure = reference_structure[0] structures = easy_pickle.load(command_line.args[1]) if (not type(structures) in (type([]), type(()))): structures = [structures] if not command_line.options.match_hydrogens: reference_structure.select_inplace( ~reference_structure.element_selection('H')) for structure in structures: structure.select_inplace(~structure.element_selection('H')) print "Reference model:" reference_structure.show_summary() print reference_model = reference_structure.as_emma_model() match_list = [] match_histogram = dicts.with_default_value(0) for structure in structures: structure.show_summary() if (hasattr(structure, "info")): print structure.info print sys.stdout.flush() refined_matches = emma.model_matches( reference_model, structure.as_emma_model(), tolerance=command_line.options.tolerance, models_are_diffraction_index_equivalent=False, break_if_match_with_no_singles=True).refined_matches if (len(refined_matches)): refined_matches[0].show() m = len(refined_matches[0].pairs) else: print "No matches" m = 0 match_list.append(match_record(m, structure.scatterers().size())) match_histogram[m] += 1 print sys.stdout.flush() print "match_list:", match_list keys = match_histogram.keys() keys.sort() keys.reverse() print "matches: frequency" sum = 0 for key in keys: v = match_histogram[key] sum += v s = 0 for key in keys: v = match_histogram[key] s += v print " %3d: %3d = %5.1f%%, %5.1f%%" % (key, v, 100.*v/sum, 100.*s/sum) print sys.stdout.flush()
def run(): command_line = (option_parser( usage="usage: cctbx.euclidean_model_matching [OPTIONS] " "reference_structure.pickle structure.pickle", description="").option("--tolerance", type="float", default=3).option( "--match_hydrogens", type='bool', default=True)).process(args=sys.argv[1:]) if len(command_line.args) != 2: command_line.parser.print_help() sys.exit(1) reference_structure = easy_pickle.load(command_line.args[0]) if (type(reference_structure) in (type([]), type(()))): reference_structure = reference_structure[0] structures = easy_pickle.load(command_line.args[1]) if (not type(structures) in (type([]), type(()))): structures = [structures] if not command_line.options.match_hydrogens: reference_structure.select_inplace( ~reference_structure.element_selection('H')) for structure in structures: structure.select_inplace(~structure.element_selection('H')) print "Reference model:" reference_structure.show_summary() print reference_model = reference_structure.as_emma_model() match_list = [] match_histogram = dicts.with_default_value(0) for structure in structures: structure.show_summary() if (hasattr(structure, "info")): print structure.info print sys.stdout.flush() refined_matches = emma.model_matches( reference_model, structure.as_emma_model(), tolerance=command_line.options.tolerance, models_are_diffraction_index_equivalent=False, break_if_match_with_no_singles=True).refined_matches if (len(refined_matches)): refined_matches[0].show() m = len(refined_matches[0].pairs) else: print "No matches" m = 0 match_list.append(match_record(m, structure.scatterers().size())) match_histogram[m] += 1 print sys.stdout.flush() print "match_list:", match_list keys = match_histogram.keys() keys.sort() keys.reverse() print "matches: frequency" sum = 0 for key in keys: v = match_histogram[key] sum += v s = 0 for key in keys: v = match_histogram[key] s += v print " %3d: %3d = %5.1f%%, %5.1f%%" % (key, v, 100. * v / sum, 100. * s / sum) print sys.stdout.flush()
def exercise(): verbose = "--verbose" in sys.argv[1:] quick = "--quick" in sys.argv[1:] list_cif = server.mon_lib_list_cif() srv = server.server(list_cif=list_cif) print "srv.root_path:", srv.root_path default_switch = "--default_off" not in sys.argv[1:] if (False or default_switch): monomers_with_commas = {} atom_id_counts = dicts.with_default_value(0) for row_id in list_cif.cif["comp_list"]["_chem_comp.id"]: if (quick and random.random() < 0.95): continue if (verbose): print "id:", row_id comp_comp_id = srv.get_comp_comp_id_direct(comp_id=row_id) if (comp_comp_id is None): print "Error instantiating comp_comp_id(%s)" % row_id else: has_primes = False has_commas = False for atom in comp_comp_id.atom_list: atom_id_counts[atom.atom_id] += 1 if (atom.atom_id.find("'") >= 0): has_primes = True if (atom.atom_id.find(",") >= 0): has_commas = True if (has_commas): monomers_with_commas[comp_comp_id.chem_comp.id] = has_primes print monomers_with_commas atom_ids = flex.std_string(atom_id_counts.keys()) counts = flex.size_t(atom_id_counts.values()) perm = flex.sort_permutation(data=counts, reverse=True) atom_ids = atom_ids.select(perm) counts = counts.select(perm) for atom_id,count in zip(atom_ids, counts): print atom_id, count if (False or default_switch): for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows(): if (quick and random.random() < 0.95): continue if (verbose): print "id:", row["_chem_comp.id"] comp_comp_id = srv.get_comp_comp_id_direct(comp_id=row["_chem_comp.id"]) check_chem_comp(cif_types.chem_comp(**row), comp_comp_id) if ("--pickle" in sys.argv[1:]): easy_pickle.dump("mon_lib.pickle", srv) if (False or default_switch): comp = srv.get_comp_comp_id_direct("GLY") comp.show() mod = srv.mod_mod_id_dict["COO"] comp.apply_mod(mod).show() if (False or default_switch): comp = srv.get_comp_comp_id_direct("LYS") comp.show() mod = srv.mod_mod_id_dict["B2C"] comp.apply_mod(mod).show() if (False or default_switch): for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows(): if (quick and random.random() < 0.95): continue comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"]) if (comp_comp_id is not None): if (comp_comp_id.classification == "peptide"): print comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name, print row["_chem_comp.group"], grp = row["_chem_comp.group"].lower().strip() if (grp not in ("l-peptide", "d-peptide", "polymer")): print "LOOK", #if (not os.path.isdir("look")): os.makedirs("look") #open("look/%s.cif" % row["_chem_comp.id"], "w").write( #open(comp_comp_id.file_name).read()) print elif (row["_chem_comp.group"].lower().find("peptide") >= 0 or comp_comp_id.chem_comp.group.lower().find("peptide") >= 0): print comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name, print row["_chem_comp.group"], "MISMATCH" if (comp_comp_id.classification in ("RNA", "DNA")): print comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name, print row["_chem_comp.group"], if (comp_comp_id.classification != row["_chem_comp.group"].strip()): print comp_comp_id.classification, "MISMATCH", print elif (row["_chem_comp.group"].lower().find("NA") >= 0 or comp_comp_id.chem_comp.group.lower().find("NA") >= 0): print comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name, print row["_chem_comp.group"], "MISMATCH" if (False or default_switch): for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows(): if (quick and random.random() < 0.95): continue comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"]) if (comp_comp_id is not None): planes = comp_comp_id.get_planes() for plane in planes: dist_esd_dict = {} for plane_atom in plane.plane_atoms: dist_esd_dict[str(plane_atom.dist_esd)] = 0 if (len(dist_esd_dict) != 1 or dist_esd_dict.keys()[0] != "0.02"): print comp_comp_id.chem_comp.id, plane.plane_id, print dist_esd_dict.keys() if (False or default_switch): standard_amino_acids = [ "GLY", "VAL", "ALA", "LEU", "ILE", "PRO", "MET", "PHE", "TRP", "SER", "THR", "TYR", "CYS", "ASN", "GLN", "ASP", "GLU", "LYS", "ARG", "HIS"] for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows(): is_standard_aa = row["_chem_comp.id"] in standard_amino_acids if (1 and not is_standard_aa): continue comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"]) if (is_standard_aa): assert comp_comp_id is not None assert comp_comp_id.chem_comp.group.strip() == "L-peptide" if (comp_comp_id is not None): print comp_comp_id.chem_comp.id.strip(), print comp_comp_id.chem_comp.name.strip(), print comp_comp_id.chem_comp.group.strip() for tor in comp_comp_id.tor_list: print " tor:", tor.atom_id_1, tor.atom_id_2, print tor.atom_id_3, tor.atom_id_4, tor.value_angle, print tor.value_angle_esd, tor.period for chir in comp_comp_id.chir_list: print " chir:", chir.atom_id_centre, chir.atom_id_1, print chir.atom_id_2, chir.atom_id_3, chir.volume_sign if (False or default_switch): elib = server.ener_lib() if (False or default_switch): for syn in elib.lib_synonym.items(): print syn if (False or default_switch): for vdw in elib.lib_vdw: vdw.show() print "OK"
def exercise(space_group_info, anomalous_flag, d_min=1.0, reflections_per_bin=200, n_bins=10, verbose=0): elements = ("N", "C", "C", "O") * 5 structure_factors = random_structure.xray_structure( space_group_info, elements=elements, volume_per_atom=50., min_distance=1.5, general_positions_only=True, use_u_aniso=False, u_iso=adptbx.b_as_u(10)).structure_factors( anomalous_flag=anomalous_flag, d_min=d_min, algorithm="direct") if (0 or verbose): structure_factors.xray_structure().show_summary() asu_contents = dicts.with_default_value(0) for elem in elements: asu_contents[elem] += 1 f_calc = abs(structure_factors.f_calc()) f_calc.setup_binner(auto_binning=True, reflections_per_bin=reflections_per_bin, n_bins=n_bins) if (0 or verbose): f_calc.binner().show_summary() for k_given in [1, 0.1, 0.01, 10, 100]: f_obs = miller.array(miller_set=f_calc, data=f_calc.data() * k_given).set_observation_type_xray_amplitude() f_obs.use_binner_of(f_calc) wp = statistics.wilson_plot(f_obs, asu_contents, e_statistics=True) if (0 or verbose): print "wilson_k, wilson_b:", wp.wilson_k, wp.wilson_b print "space group:", space_group_info.group().type().hall_symbol() print "<E^2-1>:", wp.mean_e_sq_minus_1 assert 0.8 < wp.wilson_k / k_given < 1.2 assert 0.64 < wp.wilson_intensity_scale_factor / (k_given * k_given) < 1.44 assert 9 < wp.wilson_b < 11 assert wp.xy_plot_info().fit_correlation == wp.fit_correlation if space_group_info.group().is_centric(): assert 0.90 < wp.mean_e_sq_minus_1 < 1.16 assert 3.15 < wp.percent_e_sq_gt_2 < 6.5 else: assert 0.65 < wp.mean_e_sq_minus_1 < 0.90 assert 1.0 < wp.percent_e_sq_gt_2 < 3.15 assert wp.normalised_f_obs.size() == f_obs.size() f_obs = f_calc.array(data=flex.double(f_calc.indices().size(), 0)) f_obs.use_binner_of(f_calc) n_bins = f_obs.binner().n_bins_used() try: statistics.wilson_plot(f_obs, asu_contents) except RuntimeError, e: assert not show_diff( str(e), """\ wilson_plot error: %d empty bins: Number of bins: %d Number of f_obs > 0: 0 Number of f_obs <= 0: %d""" % (n_bins, n_bins, f_obs.indices().size()))