コード例 #1
0
def run(prefix):
  """
  Exercise interaction graph construction.
  """
  pdb_inp = iotbx.pdb.input(file_name= os.path.join(qr_unit_tests,"data_files","2lvr.pdb"))
  ph = pdb_inp.construct_hierarchy()
  yoink_utils.write_yoink_infiles("cluster.xml",
                                  "qmmm.xml",
                                  ph,
                                  os.path.join(qrefine,"plugin","yoink","dat"))
  pyoink=PYoink(os.path.join(qrefine,"plugin","yoink","Yoink-0.0.1.jar"),
                os.path.join(qrefine,"plugin","yoink","dat"),
                "cluster.xml")
  interaction_list, weight = pyoink.get_interactions_list()
  #print interaction_list
  expected_list = [[24, 27], [19, 22], [5, 11], [18, 22], [25, 26], [23, 26], [2, 3], [5, 7], [9, 11], [27, 29],
                   [5, 10], [9, 10], [18, 21], [11, 12], [24, 25], [5, 12], [6, 10], [6, 7], [20, 21], [10, 11],
                   [21, 25], [5, 6], [21, 24], [17, 21], [6, 11], [12, 13], [5, 13], [17, 20], [4, 5], [3, 12],
                   [29, 30], [8, 9], [20, 24], [11, 13], [4, 13], [5, 19], [28, 29], [6, 9], [6, 19], [6, 8],
                   [13, 19], [7, 19], [6, 13], [13, 18], [15, 19], [7, 8], [4, 19], [16, 19], [6, 22], [15, 18],
                   [15, 16], [9, 31], [6, 31], [26, 27], [13, 14], [11, 22], [18, 19], [4, 15], [17, 18], [14, 15],
                   [16, 17], [22, 31], [8, 26], [4, 16], [8, 23], [15, 17], [6, 23], [7, 23], [13, 15], [22, 23],
                   [2, 4], [13, 22], [23, 24], [19, 23], [10, 12], [19, 20], [24, 30], [21, 22], [9, 26], [23, 27],
                   [4, 14], [1, 2], [16, 20], [26, 31], [25, 28], [27, 28], [22, 26], [24, 28], [20, 23], [17, 19],
                   [27, 30], [16, 18], [20, 22], [1, 3], [6, 26], [28, 30], [3, 13], [3, 5], [3, 4], [22, 25],
                   [3, 14], [9, 22]]
  for e1, e2 in zip(expected_list, interaction_list):
    e1.sort()
    e2.sort()
  expected_list.sort()
  interaction_list.sort()
  assert approx_equal(expected_list, interaction_list)
コード例 #2
0
 def __init__(self,
              working_folder="ase",
              clustering_method=None,
              altloc_method=None,
              maxnum_residues_in_cluster=20,
              charge_embedding=False,
              two_buffers=False,
              pdb_hierarchy=None,
              qm_engine_name=None,
              crystal_symmetry=None,
              clustering=True,
              qm_run=True,
              cif_objects=None,
              debug=False,
              charge_cutoff=8.0,
              save_clusters=False):
     self.charge_embedding = charge_embedding
     self.two_buffers = two_buffers
     self.crystal_symmetry = crystal_symmetry
     self.working_folder = os.path.abspath(working_folder)
     self.pdb_hierarchy = pdb_hierarchy
     self.charge_cutoff = charge_cutoff
     self.system_size = pdb_hierarchy.atoms_size()
     self.qm_engine_name = qm_engine_name
     self.clustering_method = clustering_method
     self.altloc_method = altloc_method
     self.debug = debug
     self.maxnum_residues_in_cluster = maxnum_residues_in_cluster
     self.save_clusters = save_clusters
     raw_records = pdb_hierarchy.as_pdb_string(
         crystal_symmetry=crystal_symmetry)
     self.charge_service = charges_class(raw_records=raw_records,
                                         ligand_cif_file_names=cif_objects)
     if (os.path.exists(self.working_folder) is not True):
         os.mkdir(self.working_folder)
     self.backbone_connections = fragment_utils.get_backbone_connections(
         self.pdb_hierarchy)
     self.get_altloc_molecular_indices()
     if (1):
         self.altloc_atoms = [
             atom for atom in list(pdb_hierarchy.atoms())
             if atom.pdb_label_columns()[4] != " "
         ]
     self.expansion = expand(pdb_hierarchy=self.pdb_hierarchy,
                             crystal_symmetry=self.crystal_symmetry,
                             select_within_radius=10.0)
     self.pdb_hierarchy_super = self.expansion.ph_super_sphere
     ## write expansion.pdb as the reference for capping
     self.expansion_file = "expansion.pdb"
     self.expansion.write_super_cell_selected_in_sphere(
         file_name=self.expansion_file)
     if (clustering):
         self.yoink_dat_path = os.path.join(qrefine, "plugin", "yoink",
                                            "dat")
         self.pyoink = PYoink(
             os.path.join(qrefine, "plugin", "yoink", "Yoink-0.0.1.jar"))
         self.qm_run = qm_run
         #t0 = time.time()
         self.set_up_cluster_qm()
コード例 #3
0
ファイル: tst_07.py プロジェクト: qrefine/qrefine
def run(prefix):
  """
  Exercise interaction graph construction.
  """
  if 1: # to be deprecated (Java version)
    from qrefine.utils import yoink_utils
    from qrefine.plugin.yoink.pyoink import PYoink
    ph = get_hierarchy()
    yoink_utils.write_yoink_infiles("cluster.xml",
                                  "qmmm.xml",
                                  ph,
                                  os.path.join(qrefine,"plugin","yoink","dat"))
    pyoink=PYoink(os.path.join(qrefine,"plugin","yoink","Yoink-0.0.1.jar"),
                os.path.join(qrefine,"plugin","yoink","dat"),
                "cluster.xml")
    interaction_list_java, weight = pyoink.get_interactions_list()

    expected_list_java = [[24, 27], [19, 22], [5, 11], [18, 22], [25, 26], [23, 26], [2, 3], [5, 7], [9, 11], [27, 29],
                   [5, 10], [9, 10], [18, 21], [11, 12], [24, 25], [5, 12], [6, 10], [6, 7], [20, 21], [10, 11],
                   [21, 25], [5, 6], [21, 24], [17, 21], [6, 11], [12, 13], [5, 13], [17, 20], [4, 5], [3, 12],
                   [29, 30], [8, 9], [20, 24], [11, 13], [4, 13], [5, 19], [28, 29], [6, 9], [6, 19], [6, 8],
                   [13, 19], [7, 19], [6, 13], [13, 18], [15, 19], [7, 8], [4, 19], [16, 19], [6, 22], [15, 18],
                   [15, 16], [9, 31], [6, 31], [26, 27], [13, 14], [11, 22], [18, 19], [4, 15], [17, 18], [14, 15],
                   [16, 17], [22, 31], [8, 26], [4, 16], [8, 23], [15, 17], [6, 23], [7, 23], [13, 15], [22, 23],
                   [2, 4], [13, 22], [23, 24], [19, 23], [10, 12], [19, 20], [24, 30], [21, 22], [9, 26], [23, 27],
                   [4, 14], [1, 2], [16, 20], [26, 31], [25, 28], [27, 28], [22, 26], [24, 28], [20, 23], [17, 19],
                   [27, 30], [16, 18], [20, 22], [1, 3], [6, 26], [28, 30], [3, 13], [3, 5], [3, 4], [22, 25],
                   [3, 14], [9, 22]]
    for e1, e2 in zip(expected_list_java, interaction_list_java):
        e1.sort()
        e2.sort()
    expected_list_java.sort()
    interaction_list_java.sort()
    assert approx_equal(expected_list_java, interaction_list_java)

  if 1:
    ph = get_hierarchy()
    interaction_list_cpp = pair_interaction.run(ph)
    expected_list_cpp = [
      (9, 26), (24, 30), (6, 9), (17, 20), (1, 3), (18, 19), (23, 26), (6, 7),
      (4, 19), (24, 27), (6, 10), (11, 22), (25, 26), (7, 19), (27, 28), (15, 18),
      (5, 11), (29, 30), (4, 16), (6, 23), (16, 19), (6, 26), (17, 18), (22, 25),
      (3, 12), (4, 15), (16, 18), (6, 13), (19, 23), (15, 16), (21, 24), (22, 23),
      (22, 26), (8, 9), (17, 21), (20, 21), (24, 28), (6, 11), (13, 19), (18, 21),
      (23, 24), (3, 5), (5, 10), (7, 8), (5, 7), (24, 25), (16, 20), (13, 22),
      (18, 22), (28, 29), (5, 13), (19, 22), (15, 19), (16, 17), (11, 12), (4, 13),
      (9, 11), (11, 13), (20, 22), (13, 15), (2, 3), (8, 26), (6, 8), (20, 24),
      (6, 31), (26, 31), (21, 22), (13, 18), (27, 30), (23, 27), (3, 4), (2, 4),
      (10, 11), (8, 23), (5, 6), (9, 22), (5, 19), (22, 31), (3, 14), (27, 29),
      (1, 2), (28, 30), (5, 12), (10, 12), (4, 5), (7, 10), (6, 22), (7, 23),
      (17, 19), (22, 24), (3, 13), (4, 14), (9, 10), (9, 31), (19, 20), (25, 28),
      (15, 17), (6, 19), (21, 25), (20, 23), (26, 27), (13, 14), (12, 13), (14, 15)]
    assert approx_equal(expected_list_cpp, interaction_list_cpp)
コード例 #4
0
def clusters(pdb_hierarchy):
    yoink_utils.write_yoink_infiles(
        "cluster.xml", "qmmm.xml", pdb_hierarchy,
        os.path.join(qrefine, "plugin", "yoink", "dat"))
    pyoink = PYoink(
        os.path.join(qrefine, "plugin", "yoink", "Yoink-0.0.1.jar"),
        os.path.join(qrefine, "plugin", "yoink", "dat"), "cluster.xml")
    interaction_list, weight = pyoink.get_interactions_list()
    for e in interaction_list:
        e.sort()
    interaction_list.sort()
    cc = clustering.betweenness_centrality_clustering(
        interaction_list, maxnum_residues_in_cluster=3)
    print cc.get_clusters()
    return cc.get_clusters()
コード例 #5
0
def run(prefix):
    """
  Exercise buffer region of cluster.
  """
    pdb_inp = iotbx.pdb.input(
        file_name=os.path.join(qr_unit_tests, "data_files", "2lvr.pdb"))
    ph = pdb_inp.construct_hierarchy()
    yoink_utils.write_yoink_infiles(
        "cluster.xml", "qmmm.xml", ph,
        os.path.join(qrefine, "plugin", "yoink", "dat"))
    gn_clusters = [[1, 2], [3, 4, 5, 14, 13], [6, 7, 8, 9, 22, 23, 26, 31],
                   [10, 11, 12], [15, 16, 17, 18, 19, 20, 21],
                   [24, 25, 27, 28, 29, 30]]
    bc_clusters = [[1, 2], [3, 4, 5, 13, 14, 15, 16, 17, 18, 19, 20, 21],
                   [6, 7, 8, 9, 10, 11, 12, 22, 23, 26, 31],
                   [24, 25, 27, 28, 29, 30]]
    bc_qms = [[1, 2, 3, 4],
              [
                  1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                  20, 21, 22, 23, 24, 25
              ],
              [
                  3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23, 24,
                  25, 26, 27, 31
              ], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]]
    gn_qms = [[1, 2, 3, 4],
              [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 18, 19, 22],
              [
                  5, 6, 7, 8, 9, 10, 11, 13, 18, 19, 20, 21, 22, 23, 24, 25,
                  26, 27, 31
              ], [3, 5, 6, 9, 10, 11, 12, 13, 22],
              [4, 5, 6, 7, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25],
              [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]]
    pyoink = PYoink(
        os.path.join(qrefine, "plugin", "yoink", "Yoink-0.0.1.jar"),
        os.path.join(qrefine, "plugin", "yoink", "dat"), "qmmm.xml")
    check_buffer(bc_clusters, bc_qms, pyoink)
コード例 #6
0
class fragments(object):
    def __init__(self,
                 working_folder="ase",
                 clustering_method=None,
                 altloc_method=None,
                 maxnum_residues_in_cluster=20,
                 charge_embedding=False,
                 two_buffers=False,
                 pdb_hierarchy=None,
                 qm_engine_name=None,
                 crystal_symmetry=None,
                 clustering=True,
                 qm_run=True,
                 cif_objects=None,
                 debug=False,
                 charge_cutoff=8.0,
                 save_clusters=False,
                 fast_interaction=False,
                 select_within_radius=10.0,
                 bond_with_altloc_flag=True):
        self.bond_with_altloc_flag = bond_with_altloc_flag
        self.clustering = clustering
        self.select_within_radius = select_within_radius
        self.fast_interaction = fast_interaction
        self.charge_embedding = charge_embedding
        self.two_buffers = two_buffers
        self.crystal_symmetry = crystal_symmetry
        self.working_folder = os.path.abspath(working_folder)
        self.pdb_hierarchy = pdb_hierarchy
        self.charge_cutoff = charge_cutoff
        self.system_size = pdb_hierarchy.atoms_size()
        self.qm_engine_name = qm_engine_name
        self.clustering_method = clustering_method
        self.altloc_method = altloc_method
        self.debug = debug
        self.maxnum_residues_in_cluster = maxnum_residues_in_cluster
        self.save_clusters = save_clusters
        raw_records = pdb_hierarchy.as_pdb_string(
            crystal_symmetry=crystal_symmetry)
        self.charge_service = charges_class(raw_records=raw_records,
                                            ligand_cif_file_names=cif_objects)
        if (os.path.exists(self.working_folder) is not True):
            os.mkdir(self.working_folder)
        self.backbone_connections = fragment_utils.get_backbone_connections(
            self.pdb_hierarchy)
        self.get_altloc_molecular_indices()
        if (1):
            self.altloc_atoms = [
                atom for atom in list(pdb_hierarchy.atoms())
                if atom.pdb_label_columns()[4] != " "
            ]
        self.expansion = expand(pdb_hierarchy=self.pdb_hierarchy,
                                crystal_symmetry=self.crystal_symmetry,
                                select_within_radius=self.select_within_radius)
        self.pdb_hierarchy_super = self.expansion.ph_super_sphere
        ## write expansion.pdb as the reference for capping
        self.expansion_file = "expansion.pdb"
        self.expansion.write_super_cell_selected_in_sphere(
            file_name=self.expansion_file)
        if (not self.fast_interaction):
            from qrefine.plugin.yoink.pyoink import PYoink
            from qrefine.utils.yoink_utils import write_yoink_infiles
            self.yoink_dat_path = os.path.join(qrefine, "plugin", "yoink",
                                               "dat")
            self.pyoink = PYoink(
                os.path.join(qrefine, "plugin", "yoink", "Yoink-0.0.1.jar"))
        self.qm_run = qm_run
        #t0 = time.time()
        self.set_up_cluster_qm()
        #print "time taken for interaction graph",(time.time() - t0)

    def update_xyz(self, sites_cart):
        self.pdb_hierarchy.atoms().set_xyz(sites_cart)
        pre_atoms = [
            atom.pdb_label_columns()
            for atom in self.pdb_hierarchy_super.atoms()
        ]
        self.expansion = self.expansion.update_xyz(sites_cart=sites_cart)
        self.pdb_hierarchy_super = self.expansion.ph_super_sphere
        new_atoms = [
            atom.pdb_label_columns()
            for atom in self.pdb_hierarchy_super.atoms()
        ]
        #if(self.expansion.ph_super_sphere.atoms_size()!=pre_size):
        if (pre_atoms != new_atoms):
            if (self.debug):
                print(
                    "the content of the super sphere has been changed,reset up fragments"
                )
            #Note: the atom size of self.expansion.ph_super_sphere gets changeed,
            #while the atom size in super_sphere_geometry_restraints_manager
            #does not get changed. Re-generate the object of expand
            if (1):
                self.expansion = expand(
                    pdb_hierarchy=self.pdb_hierarchy,
                    crystal_symmetry=self.crystal_symmetry,
                    select_within_radius=self.select_within_radius)
                self.pdb_hierarchy_super = self.expansion.ph_super_sphere
            self.get_fragments()
            self.get_fragment_hierarchies_and_charges()

    def get_altloc_molecular_indices(self):
        self.altloc_molecular_indices = []
        index = 0
        for chain in self.pdb_hierarchy.chains():
            for residue_group in chain.residue_groups():
                index += 1
                if (residue_group.have_conformers()):
                    self.altloc_molecular_indices.append(index)

    def set_up_cluster_qm(self, sites_cart=None):
        if (sites_cart is not None):
            ## update the selection of expansion_sphere, and its
            ## geometry_restraints_manager and pdb_hierarchy
            self.pdb_hierarchy_super = self.expansion.update(
                sites_cart=sites_cart).ph_super_sphere
        ###get clusters and their buffer regions using yoink and graph clustering.
        try:
            pre_clusters = self.clusters
        except:
            pre_clusters = None
        self.get_clusters()
        if (self.qm_run is True and pre_clusters != self.clusters):
            self.get_fragments()
            self.get_fragment_hierarchies_and_charges()

    def get_clusters(self):
        #print(self.clustering)
        n_residues = len(list(self.pdb_hierarchy.residue_groups()))
        if (not self.clustering):
            return (range(1, n_residues + 1, 1))
        if (self.fast_interaction):
            self.interaction_list = pair_interaction.run(
                copy.deepcopy(self.pdb_hierarchy))  ##deepcopy
        else:  # to be deprecated.
            self.cluster_file_name = self.working_folder + "/cluster.xml"
            self.qmmm_file_name = self.working_folder + "/qmmm.xml"
            ##  write yoink input file to get interactions
            if (not self.fast_interaction):
                from qrefine.utils.yoink_utils import write_yoink_infiles
            write_yoink_infiles(self.cluster_file_name, self.qmmm_file_name,
                                self.pdb_hierarchy, self.yoink_dat_path)
            self.pyoink.input_file = self.cluster_file_name
            self.pyoink.update()
            self.interaction_list, weight = self.pyoink.get_interactions_list()

        self.interacting_pairs = len(self.interaction_list)
        self.interaction_list += self.backbone_connections
        ## isolate altloc molecules
        new_interaction_list = []
        if (0):
            for item in self.interaction_list:
                contain_altlocs = set(
                    self.altloc_molecular_indices) & set(item)
                if (len(contain_altlocs) == 0):
                    new_interaction_list.append(item)
            self.interaction_list = new_interaction_list
        from . import clustering
        # t0 = time.time()
        self.clustering = clustering.betweenness_centrality_clustering(
            self.interaction_list,
            size=n_residues,
            maxnum_residues_in_cluster=self.maxnum_residues_in_cluster)
        clusters = self.clustering.get_clusters()
        # self.clusters=sorted(clusters, key=len, reverse=True)
        self.clusters = sorted(
            clusters,
            key=cmp_to_key(lambda x, y: 1 if len(x) < len(y) else -1
                           if len(x) > len(y) else 0))
        # self.clusters = sorted(clusters,
        #   lambda x, y: 1 if len(x) < len(y) else -1 if len(x) > len(y) else 0)
        # print "time taken for clustering", (time.time() - t0)

    def get_fragments(self):
        def selected_atom_indices_in_entire_ph(selected_atom_indices_in_sub_ph,
                                               sub_ph):
            selected_atom_indices_in_entire_ph = []
            for index, number in enumerate(sub_ph.atoms().extract_serial()):
                if (index + 1 in selected_atom_indices_in_sub_ph):
                    selected_atom_indices_in_entire_ph.append(int(number))
            return selected_atom_indices_in_entire_ph

        self.pdb_hierarchy_super.atoms_reset_serial()
        phs = [self.pdb_hierarchy_super]
        altloc_size = self.pdb_hierarchy_super.altloc_indices().size()
        if (altloc_size > 1):
            ## generate pdb_hierarchy for each altloc case
            phs = []
            asc = self.pdb_hierarchy_super.atom_selection_cache()
            if (self.debug):
                self.pdb_hierarchy_super.write_pdb_file(file_name="super.pdb")
            ## the first one altloc is " ", A B.. altlocs start from 1
            altlocs = self.pdb_hierarchy_super.altloc_indices().keys()
            altlocs.sort()
            for altloc in altlocs:
                if (altloc == ""): continue
                sel = asc.selection("altloc '%s' or altloc '' or altloc ' '" %
                                    altloc)
                ph_altloc = self.pdb_hierarchy_super.select(sel)
                phs.append(ph_altloc)
                if (self.debug):
                    ph_altloc.write_pdb_file(file_name="super-" + str(altloc) +
                                             ".pdb")
        cluster_atoms_in_phs = []
        fragment_super_atoms_in_phs = []
        clusters = self.clusters  ##from graph clustring, molecular indices
        ##loop over each cluster in every pdb_hierarchy to define buffer region
        ##fragment consists of cluster and buffer
        ##all pdb_hierarchies have the same clusters at molecular level
        for ph in phs:
            cluster_atoms_in_ph = []
            fragment_super_atoms_in_ph = []
            molecules_in_fragments = []
            ## write yoink input file to get fragment
            if (not self.fast_interaction):
                pyoink = self.pyoink
                from qrefine.utils.yoink_utils import write_yoink_infiles
                write_yoink_infiles(self.cluster_file_name,
                                    self.qmmm_file_name, ph,
                                    self.yoink_dat_path)
            for i in range(len(clusters)):
                # print 'processing cluster', i
                if (self.fast_interaction):
                    atoms_in_one_cluster, atoms_in_one_fragment, molecules_in_one_fragment = \
                      pair_interaction.run(copy.deepcopy(ph), clusters[i])  ##deepcopy
                    # print("clusters[i]",clusters[i])
                    # print("molecules_in_one_fragment:", molecules_in_one_fragment)
                    # print("atoms_in_one_fragment",atoms_in_one_fragment)
                else:
                    pyoink.input_file = self.qmmm_file_name
                    pyoink.update(clusters[i])
                    atoms_in_one_cluster = pyoink.qm_core_fixed_indices
                    atoms_in_one_fragment, molecules_in_one_fragment = pyoink.get_qm_indices(
                    )

                atoms_in_one_cluster = selected_atom_indices_in_entire_ph(
                    atoms_in_one_cluster, ph)
                cluster_atoms_in_ph.append(atoms_in_one_cluster)

                atoms_in_one_fragment = selected_atom_indices_in_entire_ph(
                    atoms_in_one_fragment, ph)
                fragment_super_atoms_in_ph.append(atoms_in_one_fragment)
                molecules_in_fragments.append(molecules_in_one_fragment)
                if (0):
                    print(i, "atoms in cluster: ", atoms_in_one_cluster)
                if True:
                    atoms = self.pdb_hierarchy_super.atoms()
                    check_selection_integrity(atoms, atoms_in_one_cluster)
            # print "cluster->fragments done"
            if (self.two_buffers):  ## define a second buffer layer
                # print "adding second layer"
                fragment_super_atoms_in_ph = []
                for molecules in molecules_in_fragments:
                    if (self.fast_interaction):
                        junk1, atoms_in_one_fragment, junk2 = pair_interaction.run(
                            copy.deepcopy(ph), molecules)
                    else:
                        pyoink.input_file = self.qmmm_file_name
                        pyoink.update(list(molecules))
                        atoms_in_one_fragment, junk = pyoink.get_qm_indices()
                    atoms_in_one_fragment = selected_atom_indices_in_entire_ph(
                        atoms_in_one_fragment, ph)
                    fragment_super_atoms_in_ph.append(atoms_in_one_fragment)
            cluster_atoms_in_phs.append(cluster_atoms_in_ph)
            fragment_super_atoms_in_phs.append(fragment_super_atoms_in_ph)
        #
        ##always collect the clustering result from phs[0]
        self.cluster_atoms = []
        self.fragment_super_atoms = []
        self.fragment_scales = []
        for i_cluster in range(len(clusters)):
            self.collect_cluster_and_fragment(cluster_atoms_in_phs,
                                              fragment_super_atoms_in_phs,
                                              i_cluster, 0)
        ##check alternative locations and get all clusters and fragments
        overlap_clusters = {}
        overlap_fragments_super = {}
        if (len(phs) > 1):
            for i_cluster in range(len(clusters)):
                for j_ph in range(1, len(phs)):
                    fragment_same = (set(
                        fragment_super_atoms_in_phs[0][i_cluster]) == set(
                            fragment_super_atoms_in_phs[j_ph][i_cluster]))
                    # two same fragments for same non-altloc clusters
                    if (fragment_same): continue
                    # check the overlap
                    overlap_atoms_in_one_cluster = self.atoms_overlap(
                        cluster_atoms_in_phs, i_cluster, j_ph)
                    empty_overlap_cluster = (
                        len(overlap_atoms_in_one_cluster) == 0)
                    #substract the contribution from overlap
                    if (self.altloc_method == "subtract"):
                        self.collect_cluster_and_fragment(
                            cluster_atoms_in_phs, fragment_super_atoms_in_phs,
                            i_cluster, j_ph)
                        # different fragments for different altloc clusters
                        if (empty_overlap_cluster): continue
                        else:
                            # two same non-altloc clusters, the overlap is a cluster
                            # two different altloc clusters, the overlap is part of a residue,
                            # even an atom
                            # the cluster overlap will cause troubles for QM calculation,
                            # expecially when it is an atom
                            atoms = self.pdb_hierarchy_super.atoms()
                            overlap_atoms_in_one_fragment = self.atoms_overlap(
                                fragment_super_atoms_in_phs, i_cluster, j_ph)
                            check_selection_integrity(
                                atoms, overlap_atoms_in_one_fragment)
                            self.cluster_atoms.append(
                                list(overlap_atoms_in_one_cluster))
                            self.fragment_super_atoms.append(
                                list(overlap_atoms_in_one_fragment))
                            scale_list = [-1.0] * sum(
                                i <= self.system_size
                                for i in overlap_atoms_in_one_fragment)
                            self.fragment_scales.append(scale_list)
                    ##average the contributions from overlap
                    elif (self.altloc_method == "average"):
                        # different fragments for different altloc clusters
                        if (empty_overlap_cluster):
                            self.collect_cluster_and_fragment(
                                cluster_atoms_in_phs,
                                fragment_super_atoms_in_phs, i_cluster, j_ph)
                        else:
                            # two same non-altloc clusters, the overlap is a cluster
                            # two different altloc clusters, the overlap is part of a residue, even an atom
                            # collect all overlap clusters and fragments
                            try:
                                overlap_clusters[i_cluster] = overlap_clusters[
                                    i_cluster].append(
                                        cluster_atoms_in_phs[j_ph][i_cluster])
                                overlap_fragments_super[i_cluster] = \
                                  overlap_fragments_super[i_cluster].append(
                                                   fragment_super_atoms_in_phs[j_ph][i_cluster])
                            except:
                                overlap_clusters[i_cluster] = \
                                  [cluster_atoms_in_phs[j_ph][i_cluster]]
                                overlap_fragments_super[i_cluster] = \
                                  [fragment_super_atoms_in_phs[j_ph][i_cluster]]
        overlap_atoms = []
        for i_cluster, overlap_cluster in overlap_clusters.items():
            overlap_atoms = overlap_atoms + list(
                itertools.chain.from_iterable(overlap_cluster +
                                              [self.cluster_atoms[i_cluster]])
            )  #[atom_index, atom_index]
        frequency_overlap_atoms = {
            x: overlap_atoms.count(x)
            for x in overlap_atoms
        }  #{atom_index,frequency}
        for i_cluster, clusters in overlap_clusters.items():
            ## reset the fragment scale for the ith fragment in ph[0]
            for index, atom in enumerate([
                    i for i in self.fragment_super_atoms[i_cluster]
                    if i <= self.system_size
            ]):
                if (atom in self.cluster_atoms[i_cluster]
                        and atom in frequency_overlap_atoms.keys()
                        and not self.bond_with_altloc(
                            atom, self.bond_with_altloc_flag)):
                    self.fragment_scales[i_cluster][index] = \
                      1.0/frequency_overlap_atoms[atom]
            ## add overlap clusters and fragments
            for index, fragment_super in enumerate(
                    overlap_fragments_super[i_cluster]):
                scale_list = []
                for atom in [
                        i for i in fragment_super if i <= self.system_size
                ]:
                    if (atom in clusters[index]
                            and atom in frequency_overlap_atoms.keys()
                            and not self.bond_with_altloc(
                                atom, self.bond_with_altloc_flag)):

                        scale_list.append(1.0 / frequency_overlap_atoms[atom])
                    else:
                        scale_list.append(1.0)
                self.cluster_atoms.append(clusters[index])
                self.fragment_super_atoms.append(fragment_super)
                self.fragment_scales.append(scale_list)

    def bond_with_altloc(self, atom_index, bond_with_altloc_flag):
        if (not bond_with_altloc_flag): return False
        ph_atoms = list(self.pdb_hierarchy.atoms())
        ph_atom = ph_atoms[atom_index - 1]
        bond = False
        for altloc_atom in self.altloc_atoms:
            distance = ph_atom.distance(altloc_atom)
            if (distance < 1.7):
                bond = True
                break
            ##TODO
            ##check bond, better from bond topology
        return bond

    def atoms_overlap(self, cluster_atoms_in_phs, i_cluster, j_ph):
        overlap_atoms_in_one_cluster = set(cluster_atoms_in_phs[0][i_cluster]) & \
                                       set(cluster_atoms_in_phs[j_ph][i_cluster])
        return overlap_atoms_in_one_cluster

    def collect_cluster_and_fragment(self, cluster_atoms_in_phs,
                                     fragment_super_atoms_in_phs, i_cluster,
                                     j_ph):
        self.cluster_atoms.append(cluster_atoms_in_phs[j_ph][i_cluster])
        self.fragment_super_atoms.append(
            fragment_super_atoms_in_phs[j_ph][i_cluster])
        scale_list = [1.0] * sum(
            i <= self.system_size
            for i in fragment_super_atoms_in_phs[j_ph][i_cluster])
        self.fragment_scales.append(scale_list)

    def get_fragment_hierarchies_and_charges(self):
        def pdb_hierarchy_select(atoms_size, selection):
            selection_array = flex.bool(atoms_size, False)
            for item in selection:
                if (item <= atoms_size):
                    selection_array[item - 1] = True
            return selection_array

        self.fragment_selections = []
        self.fragment_super_selections = []
        self.fragment_charges = []
        self.cluster_selections = []
        self.buffer_selections = []
        self.cluster_selections = []
        self.fragment_capped_initial = []
        for i in range(len(self.fragment_super_atoms)):
            fragment_selection = pdb_hierarchy_select(
                self.pdb_hierarchy.atoms_size(), self.fragment_super_atoms[i])
            ## QM part is fragment_super
            fragment_super_selection = pdb_hierarchy_select(
                self.pdb_hierarchy_super.atoms_size(),
                self.fragment_super_atoms[i])
            fragment_super_hierarchy = self.pdb_hierarchy_super.select(
                fragment_super_selection)
            if (self.debug):
                fragment_super_hierarchy.write_pdb_file(file_name=str(i) +
                                                        "-origin-cs.pdb")
                fragment_super_hierarchy.write_pdb_file(
                    file_name=str(i) + ".pdb",
                    crystal_symmetry=self.expansion.cs_box)
            charge_hierarchy = completion.run(
                pdb_hierarchy=fragment_super_hierarchy,
                crystal_symmetry=self.expansion.cs_box,
                model_completion=False,
                original_pdb_filename=self.expansion_file)
            self.fragment_capped_initial.append(charge_hierarchy)
            raw_records = charge_hierarchy.as_pdb_string(
                crystal_symmetry=self.expansion.cs_box)
            if (1):
                charge_hierarchy.write_pdb_file(
                    file_name=str(i) + "_capping_tmp.pdb",
                    crystal_symmetry=self.expansion.cs_box)

            self.charge_service.update_pdb_hierarchy(
                charge_hierarchy,
                self.expansion.cs_box,
            )
            #TODO: do not why self.charge_service could not right charge
            #charge = self.charge_service.get_total_charge()
            charge = charges_class(pdb_filename=str(i) +
                                   "_capping_tmp.pdb").get_total_charge()
            #the capping pdb file causes problem for tests, remove it
            os.remove(str(i) + "_capping_tmp.pdb")
            self.fragment_super_selections.append(fragment_super_selection)
            #
            self.fragment_selections.append(fragment_selection)
            self.fragment_charges.append(charge)
            cluster_selection = pdb_hierarchy_select(
                self.pdb_hierarchy.atoms_size(), self.cluster_atoms[i])
            self.cluster_selections.append(cluster_selection)
            s = fragment_selection == cluster_selection
            buffer_selection = fragment_selection.deep_copy().set_selected(
                s, False)
            self.buffer_selections.append(buffer_selection)
            # if(self.debug):
            #   fragment_super_hierarchy.write_pdb_file(file_name=str(i)+"_frag.pdb",
            #     crystal_symmetry=self.expansion.cs_box)
            #   cluster_pdb_hierarchy = self.pdb_hierarchy.select(cluster_selection)
            #   cluster_pdb_hierarchy.write_pdb_file(file_name=str(i)+"_cluster.pdb",
            #     crystal_symmetry=self.expansion.cs_box)
            check_hierarchy(fragment_super_hierarchy)