def exercise(space_group_info, anomalous_flag,
             d_min=1.0, reflections_per_bin=200, n_bins=10, verbose=0):
  elements = ("N", "C", "C", "O") * 5
  structure_factors = random_structure.xray_structure(
    space_group_info,
    elements=elements,
    volume_per_atom=50.,
    min_distance=1.5,
    general_positions_only=True,
    use_u_aniso=False,
    u_iso=adptbx.b_as_u(10)
    ).structure_factors(
        anomalous_flag=anomalous_flag, d_min=d_min, algorithm="direct")
  if (0 or verbose):
    structure_factors.xray_structure().show_summary()
  asu_contents = dicts.with_default_value(0)
  for elem in elements: asu_contents[elem] += 1
  f_calc = abs(structure_factors.f_calc())
  f_calc.setup_binner(
    auto_binning=True,
    reflections_per_bin=reflections_per_bin,
    n_bins=n_bins)
  if (0 or verbose):
    f_calc.binner().show_summary()
  for k_given in [1,0.1,0.01,10,100]:
    f_obs = miller.array(
      miller_set=f_calc,
      data=f_calc.data()*k_given).set_observation_type_xray_amplitude()
    f_obs.use_binner_of(f_calc)
    wp = statistics.wilson_plot(f_obs, asu_contents, e_statistics=True)
    if (0 or verbose):
      print "wilson_k, wilson_b:", wp.wilson_k, wp.wilson_b
      print "space group:", space_group_info.group().type().hall_symbol()
      print "<E^2-1>:", wp.mean_e_sq_minus_1

    assert 0.8 < wp.wilson_k/k_given < 1.2
    assert 0.64 < wp.wilson_intensity_scale_factor/(k_given*k_given) < 1.44
    assert 9 < wp.wilson_b < 11
    assert wp.xy_plot_info().fit_correlation == wp.fit_correlation
    if space_group_info.group().is_centric():
      assert 0.90 < wp.mean_e_sq_minus_1 < 1.16
      assert 3.15 < wp.percent_e_sq_gt_2 < 6.5
    else:
      assert 0.65 < wp.mean_e_sq_minus_1 < 0.90
      assert 1.0 < wp.percent_e_sq_gt_2 < 3.15
    assert wp.normalised_f_obs.size() == f_obs.size()
  f_obs = f_calc.array(data=flex.double(f_calc.indices().size(), 0))
  f_obs.use_binner_of(f_calc)
  n_bins = f_obs.binner().n_bins_used()
  try:
    statistics.wilson_plot(f_obs, asu_contents)
  except RuntimeError, e:
    assert not show_diff(str(e), """\
wilson_plot error: %d empty bins:
  Number of bins: %d
  Number of f_obs > 0: 0
  Number of f_obs <= 0: %d""" % (n_bins, n_bins, f_obs.indices().size()))
Exemple #2
0
def exercise():
    verbose = "--verbose" in sys.argv[1:]
    quick = "--quick" in sys.argv[1:]
    list_cif = server.mon_lib_list_cif()
    srv = server.server(list_cif=list_cif)
    print("srv.root_path:", srv.root_path)
    default_switch = "--default_off" not in sys.argv[1:]
    if (False or default_switch):
        monomers_with_commas = {}
        atom_id_counts = dicts.with_default_value(0)
        for row_id in list_cif.cif["comp_list"]["_chem_comp.id"]:
            if (quick and random.random() < 0.95): continue
            if (verbose): print("id:", row_id)
            comp_comp_id = srv.get_comp_comp_id_direct(comp_id=row_id)
            if (comp_comp_id is None):
                print("Could not instantiating comp_comp_id(%s)" % row_id)
            else:
                has_primes = False
                has_commas = False
                for atom in comp_comp_id.atom_list:
                    atom_id_counts[atom.atom_id] += 1
                    if (atom.atom_id.find("'") >= 0):
                        has_primes = True
                    if (atom.atom_id.find(",") >= 0):
                        has_commas = True
                if (has_commas):
                    monomers_with_commas[
                        comp_comp_id.chem_comp.id] = has_primes
        print(monomers_with_commas)
        atom_ids = flex.std_string(list(atom_id_counts.keys()))
        counts = flex.size_t(list(atom_id_counts.values()))
        perm = flex.sort_permutation(data=counts, reverse=True)
        atom_ids = atom_ids.select(perm)
        counts = counts.select(perm)
        for atom_id, count in zip(atom_ids, counts):
            print(atom_id, count)
    if (False or default_switch):
        for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows():
            if (quick and random.random() < 0.95): continue
            if (verbose): print("id:", row["_chem_comp.id"])
            comp_comp_id = srv.get_comp_comp_id_direct(
                comp_id=row["_chem_comp.id"])
            check_chem_comp(cif_types.chem_comp(**row), comp_comp_id)
        if ("--pickle" in sys.argv[1:]):
            easy_pickle.dump("mon_lib.pickle", srv)
    if (False or default_switch):
        comp = srv.get_comp_comp_id_direct("GLY")
        comp.show()
        mod = srv.mod_mod_id_dict["COO"]
        comp.apply_mod(mod).show()
    if (False or default_switch):
        comp = srv.get_comp_comp_id_direct("LYS")
        comp.show()
        mod = srv.mod_mod_id_dict["B2C"]
        comp.apply_mod(mod).show()
    if (False or default_switch):
        for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows():
            if (quick and random.random() < 0.95): continue
            comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"])
            if (comp_comp_id is not None):
                if (comp_comp_id.classification == "peptide"):
                    print(comp_comp_id.chem_comp.id,
                          comp_comp_id.chem_comp.name,
                          end=' ')
                    print(row["_chem_comp.group"], end=' ')
                    grp = row["_chem_comp.group"].lower().strip()
                    if (grp not in ("l-peptide", "d-peptide", "polymer")):
                        print("LOOK", end=' ')
                        #if (not os.path.isdir("look")): os.makedirs("look")
                        #open("look/%s.cif" % row["_chem_comp.id"], "w").write(
                        #open(comp_comp_id.file_name).read())
                    print()
                elif (row["_chem_comp.group"].lower().find("peptide") >= 0
                      or comp_comp_id.chem_comp.group.lower().find("peptide")
                      >= 0):
                    print(comp_comp_id.chem_comp.id,
                          comp_comp_id.chem_comp.name,
                          end=' ')
                    print(row["_chem_comp.group"], "MISMATCH")
                if (comp_comp_id.classification in ("RNA", "DNA")):
                    print(comp_comp_id.chem_comp.id,
                          comp_comp_id.chem_comp.name,
                          end=' ')
                    print(row["_chem_comp.group"], end=' ')
                    if (comp_comp_id.classification !=
                            row["_chem_comp.group"].strip()):
                        print(comp_comp_id.classification, "MISMATCH", end=' ')
                    print()
                elif (row["_chem_comp.group"].lower().find("NA") >= 0
                      or comp_comp_id.chem_comp.group.lower().find("NA") >= 0):
                    print(comp_comp_id.chem_comp.id,
                          comp_comp_id.chem_comp.name,
                          end=' ')
                    print(row["_chem_comp.group"], "MISMATCH")
    if (False or default_switch):
        for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows():
            if (quick and random.random() < 0.95): continue
            comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"])
            if (comp_comp_id is not None):
                planes = comp_comp_id.get_planes()
                for plane in planes:
                    dist_esd_dict = {}
                    for plane_atom in plane.plane_atoms:
                        dist_esd_dict[str(plane_atom.dist_esd)] = 0
                    # FIXME: might break compat for py2/3 because indexing a values call
                    if (len(dist_esd_dict) != 1
                            or list(dist_esd_dict.keys())[0] != "0.02"):
                        print(comp_comp_id.chem_comp.id,
                              plane.plane_id,
                              end=' ')
                        print(list(dist_esd_dict.keys()))
    if (False or default_switch):
        standard_amino_acids = [
            "GLY", "VAL", "ALA", "LEU", "ILE", "PRO", "MET", "PHE", "TRP",
            "SER", "THR", "TYR", "CYS", "ASN", "GLN", "ASP", "GLU", "LYS",
            "ARG", "HIS"
        ]
        for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows():
            is_standard_aa = row["_chem_comp.id"] in standard_amino_acids
            if (1 and not is_standard_aa):
                continue
            comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"])
            if (is_standard_aa):
                assert comp_comp_id is not None
                assert comp_comp_id.chem_comp.group.strip() == "L-peptide"
            if (comp_comp_id is not None):
                print(comp_comp_id.chem_comp.id.strip(), end=' ')
                print(comp_comp_id.chem_comp.name.strip(), end=' ')
                print(comp_comp_id.chem_comp.group.strip())
                for tor in comp_comp_id.tor_list:
                    print("  tor:", tor.atom_id_1, tor.atom_id_2, end=' ')
                    print(tor.atom_id_3,
                          tor.atom_id_4,
                          tor.value_angle,
                          end=' ')
                    print(tor.value_angle_esd, tor.period)
                for chir in comp_comp_id.chir_list:
                    print("  chir:",
                          chir.atom_id_centre,
                          chir.atom_id_1,
                          end=' ')
                    print(chir.atom_id_2, chir.atom_id_3, chir.volume_sign)
    if (False or default_switch):
        elib = server.ener_lib()
        if (False or default_switch):
            for syn in elib.lib_synonym.items():
                print(syn)
        if (False or default_switch):
            for vdw in elib.lib_vdw:
                vdw.show()
    print("OK")
Exemple #3
0
def calculate_cell_content(xray_structure):
    result = dicts.with_default_value(0)
    for sc in xray_structure.scatterers():
        result[sc.scattering_type] += sc.occupancy * sc.multiplicity()
    return result
def run():
  command_line = (option_parser(
    usage="usage: cctbx.euclidean_model_matching [OPTIONS] "
          "reference_structure.pickle structure.pickle",
    description="")
    .option("--tolerance",
            type="float",
            default=3)
    .option("--match_hydrogens", type='bool', default=True)
  ).process(args=sys.argv[1:])
  if len(command_line.args) != 2:
    command_line.parser.print_help()
    sys.exit(1)
  reference_structure = easy_pickle.load(command_line.args[0])
  if (type(reference_structure) in (type([]), type(()))):
    reference_structure = reference_structure[0]
  structures = easy_pickle.load(command_line.args[1])
  if (not type(structures) in (type([]), type(()))):
    structures = [structures]

  if not command_line.options.match_hydrogens:
    reference_structure.select_inplace(
      ~reference_structure.element_selection('H'))
    for structure in structures:
      structure.select_inplace(~structure.element_selection('H'))
  print "Reference model:"
  reference_structure.show_summary()
  print
  reference_model = reference_structure.as_emma_model()

  match_list = []
  match_histogram = dicts.with_default_value(0)
  for structure in structures:
    structure.show_summary()
    if (hasattr(structure, "info")):
      print structure.info
    print
    sys.stdout.flush()
    refined_matches = emma.model_matches(
      reference_model,
      structure.as_emma_model(),
      tolerance=command_line.options.tolerance,
      models_are_diffraction_index_equivalent=False,
      break_if_match_with_no_singles=True).refined_matches
    if (len(refined_matches)):
      refined_matches[0].show()
      m = len(refined_matches[0].pairs)
    else:
      print "No matches"
      m = 0
    match_list.append(match_record(m, structure.scatterers().size()))
    match_histogram[m] += 1
    print
    sys.stdout.flush()
  print "match_list:", match_list
  keys = match_histogram.keys()
  keys.sort()
  keys.reverse()
  print "matches: frequency"
  sum = 0
  for key in keys:
    v = match_histogram[key]
    sum += v
  s = 0
  for key in keys:
    v = match_histogram[key]
    s += v
    print "  %3d: %3d = %5.1f%%, %5.1f%%" % (key, v, 100.*v/sum, 100.*s/sum)
  print
  sys.stdout.flush()
Exemple #5
0
def run():
    command_line = (option_parser(
        usage="usage: cctbx.euclidean_model_matching [OPTIONS] "
        "reference_structure.pickle structure.pickle",
        description="").option("--tolerance", type="float",
                               default=3).option(
                                   "--match_hydrogens",
                                   type='bool',
                                   default=True)).process(args=sys.argv[1:])
    if len(command_line.args) != 2:
        command_line.parser.print_help()
        sys.exit(1)
    reference_structure = easy_pickle.load(command_line.args[0])
    if (type(reference_structure) in (type([]), type(()))):
        reference_structure = reference_structure[0]
    structures = easy_pickle.load(command_line.args[1])
    if (not type(structures) in (type([]), type(()))):
        structures = [structures]

    if not command_line.options.match_hydrogens:
        reference_structure.select_inplace(
            ~reference_structure.element_selection('H'))
        for structure in structures:
            structure.select_inplace(~structure.element_selection('H'))
    print "Reference model:"
    reference_structure.show_summary()
    print
    reference_model = reference_structure.as_emma_model()

    match_list = []
    match_histogram = dicts.with_default_value(0)
    for structure in structures:
        structure.show_summary()
        if (hasattr(structure, "info")):
            print structure.info
        print
        sys.stdout.flush()
        refined_matches = emma.model_matches(
            reference_model,
            structure.as_emma_model(),
            tolerance=command_line.options.tolerance,
            models_are_diffraction_index_equivalent=False,
            break_if_match_with_no_singles=True).refined_matches
        if (len(refined_matches)):
            refined_matches[0].show()
            m = len(refined_matches[0].pairs)
        else:
            print "No matches"
            m = 0
        match_list.append(match_record(m, structure.scatterers().size()))
        match_histogram[m] += 1
        print
        sys.stdout.flush()
    print "match_list:", match_list
    keys = match_histogram.keys()
    keys.sort()
    keys.reverse()
    print "matches: frequency"
    sum = 0
    for key in keys:
        v = match_histogram[key]
        sum += v
    s = 0
    for key in keys:
        v = match_histogram[key]
        s += v
        print "  %3d: %3d = %5.1f%%, %5.1f%%" % (key, v, 100. * v / sum,
                                                 100. * s / sum)
    print
    sys.stdout.flush()
Exemple #6
0
def exercise():
  verbose = "--verbose" in sys.argv[1:]
  quick = "--quick" in sys.argv[1:]
  list_cif = server.mon_lib_list_cif()
  srv = server.server(list_cif=list_cif)
  print "srv.root_path:", srv.root_path
  default_switch = "--default_off" not in sys.argv[1:]
  if (False or default_switch):
    monomers_with_commas = {}
    atom_id_counts = dicts.with_default_value(0)
    for row_id in list_cif.cif["comp_list"]["_chem_comp.id"]:
      if (quick and random.random() < 0.95): continue
      if (verbose): print "id:", row_id
      comp_comp_id = srv.get_comp_comp_id_direct(comp_id=row_id)
      if (comp_comp_id is None):
        print "Error instantiating comp_comp_id(%s)" % row_id
      else:
        has_primes = False
        has_commas = False
        for atom in comp_comp_id.atom_list:
          atom_id_counts[atom.atom_id] += 1
          if (atom.atom_id.find("'") >= 0):
            has_primes = True
          if (atom.atom_id.find(",") >= 0):
            has_commas = True
        if (has_commas):
          monomers_with_commas[comp_comp_id.chem_comp.id] = has_primes
    print monomers_with_commas
    atom_ids = flex.std_string(atom_id_counts.keys())
    counts = flex.size_t(atom_id_counts.values())
    perm = flex.sort_permutation(data=counts, reverse=True)
    atom_ids = atom_ids.select(perm)
    counts = counts.select(perm)
    for atom_id,count in zip(atom_ids, counts):
      print atom_id, count
  if (False or default_switch):
    for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows():
      if (quick and random.random() < 0.95): continue
      if (verbose): print "id:", row["_chem_comp.id"]
      comp_comp_id = srv.get_comp_comp_id_direct(comp_id=row["_chem_comp.id"])
      check_chem_comp(cif_types.chem_comp(**row), comp_comp_id)
    if ("--pickle" in sys.argv[1:]):
      easy_pickle.dump("mon_lib.pickle", srv)
  if (False or default_switch):
    comp = srv.get_comp_comp_id_direct("GLY")
    comp.show()
    mod = srv.mod_mod_id_dict["COO"]
    comp.apply_mod(mod).show()
  if (False or default_switch):
    comp = srv.get_comp_comp_id_direct("LYS")
    comp.show()
    mod = srv.mod_mod_id_dict["B2C"]
    comp.apply_mod(mod).show()
  if (False or default_switch):
    for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows():
      if (quick and random.random() < 0.95): continue
      comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"])
      if (comp_comp_id is not None):
        if (comp_comp_id.classification == "peptide"):
          print comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name,
          print row["_chem_comp.group"],
          grp = row["_chem_comp.group"].lower().strip()
          if (grp not in ("l-peptide", "d-peptide", "polymer")):
            print "LOOK",
            #if (not os.path.isdir("look")): os.makedirs("look")
            #open("look/%s.cif" % row["_chem_comp.id"], "w").write(
              #open(comp_comp_id.file_name).read())
          print
        elif (row["_chem_comp.group"].lower().find("peptide") >= 0
              or comp_comp_id.chem_comp.group.lower().find("peptide") >= 0):
          print comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name,
          print row["_chem_comp.group"], "MISMATCH"
        if (comp_comp_id.classification in ("RNA", "DNA")):
          print comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name,
          print row["_chem_comp.group"],
          if (comp_comp_id.classification != row["_chem_comp.group"].strip()):
            print comp_comp_id.classification, "MISMATCH",
          print
        elif (row["_chem_comp.group"].lower().find("NA") >= 0
              or comp_comp_id.chem_comp.group.lower().find("NA") >= 0):
          print comp_comp_id.chem_comp.id, comp_comp_id.chem_comp.name,
          print row["_chem_comp.group"], "MISMATCH"
  if (False or default_switch):
    for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows():
      if (quick and random.random() < 0.95): continue
      comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"])
      if (comp_comp_id is not None):
        planes = comp_comp_id.get_planes()
        for plane in planes:
          dist_esd_dict = {}
          for plane_atom in plane.plane_atoms:
            dist_esd_dict[str(plane_atom.dist_esd)] = 0
          if (len(dist_esd_dict) != 1 or dist_esd_dict.keys()[0] != "0.02"):
            print comp_comp_id.chem_comp.id, plane.plane_id,
            print dist_esd_dict.keys()
  if (False or default_switch):
    standard_amino_acids = [
      "GLY", "VAL", "ALA", "LEU", "ILE", "PRO", "MET", "PHE", "TRP", "SER",
      "THR", "TYR", "CYS", "ASN", "GLN", "ASP", "GLU", "LYS", "ARG", "HIS"]
    for row in list_cif.cif["comp_list"]["_chem_comp"].iterrows():
      is_standard_aa = row["_chem_comp.id"] in standard_amino_acids
      if (1 and not is_standard_aa):
        continue
      comp_comp_id = srv.get_comp_comp_id_direct(row["_chem_comp.id"])
      if (is_standard_aa):
        assert comp_comp_id is not None
        assert comp_comp_id.chem_comp.group.strip() == "L-peptide"
      if (comp_comp_id is not None):
        print comp_comp_id.chem_comp.id.strip(),
        print comp_comp_id.chem_comp.name.strip(),
        print comp_comp_id.chem_comp.group.strip()
        for tor in comp_comp_id.tor_list:
          print "  tor:", tor.atom_id_1, tor.atom_id_2,
          print tor.atom_id_3, tor.atom_id_4, tor.value_angle,
          print tor.value_angle_esd, tor.period
        for chir in comp_comp_id.chir_list:
          print "  chir:", chir.atom_id_centre, chir.atom_id_1,
          print chir.atom_id_2, chir.atom_id_3, chir.volume_sign
  if (False or default_switch):
    elib = server.ener_lib()
    if (False or default_switch):
      for syn in elib.lib_synonym.items():
        print syn
    if (False or default_switch):
      for vdw in elib.lib_vdw:
        vdw.show()
  print "OK"
def exercise(space_group_info,
             anomalous_flag,
             d_min=1.0,
             reflections_per_bin=200,
             n_bins=10,
             verbose=0):
    elements = ("N", "C", "C", "O") * 5
    structure_factors = random_structure.xray_structure(
        space_group_info,
        elements=elements,
        volume_per_atom=50.,
        min_distance=1.5,
        general_positions_only=True,
        use_u_aniso=False,
        u_iso=adptbx.b_as_u(10)).structure_factors(
            anomalous_flag=anomalous_flag, d_min=d_min, algorithm="direct")
    if (0 or verbose):
        structure_factors.xray_structure().show_summary()
    asu_contents = dicts.with_default_value(0)
    for elem in elements:
        asu_contents[elem] += 1
    f_calc = abs(structure_factors.f_calc())
    f_calc.setup_binner(auto_binning=True,
                        reflections_per_bin=reflections_per_bin,
                        n_bins=n_bins)
    if (0 or verbose):
        f_calc.binner().show_summary()
    for k_given in [1, 0.1, 0.01, 10, 100]:
        f_obs = miller.array(miller_set=f_calc, data=f_calc.data() *
                             k_given).set_observation_type_xray_amplitude()
        f_obs.use_binner_of(f_calc)
        wp = statistics.wilson_plot(f_obs, asu_contents, e_statistics=True)
        if (0 or verbose):
            print "wilson_k, wilson_b:", wp.wilson_k, wp.wilson_b
            print "space group:", space_group_info.group().type().hall_symbol()
            print "<E^2-1>:", wp.mean_e_sq_minus_1

        assert 0.8 < wp.wilson_k / k_given < 1.2
        assert 0.64 < wp.wilson_intensity_scale_factor / (k_given *
                                                          k_given) < 1.44
        assert 9 < wp.wilson_b < 11
        assert wp.xy_plot_info().fit_correlation == wp.fit_correlation
        if space_group_info.group().is_centric():
            assert 0.90 < wp.mean_e_sq_minus_1 < 1.16
            assert 3.15 < wp.percent_e_sq_gt_2 < 6.5
        else:
            assert 0.65 < wp.mean_e_sq_minus_1 < 0.90
            assert 1.0 < wp.percent_e_sq_gt_2 < 3.15
        assert wp.normalised_f_obs.size() == f_obs.size()
    f_obs = f_calc.array(data=flex.double(f_calc.indices().size(), 0))
    f_obs.use_binner_of(f_calc)
    n_bins = f_obs.binner().n_bins_used()
    try:
        statistics.wilson_plot(f_obs, asu_contents)
    except RuntimeError, e:
        assert not show_diff(
            str(e), """\
wilson_plot error: %d empty bins:
  Number of bins: %d
  Number of f_obs > 0: 0
  Number of f_obs <= 0: %d""" % (n_bins, n_bins, f_obs.indices().size()))
def calculate_cell_content(xray_structure):
  result = dicts.with_default_value(0)
  for sc in xray_structure.scatterers():
    result[sc.scattering_type] += sc.occupancy * sc.multiplicity()
  return result