def test_hash_ecfp_pair(self): for power in (2, 16, 64): for _ in range(10): string1 = random_string(10) string2 = random_string(10) pair_hash = rgf.hash_ecfp_pair((string1, string2), power) self.assertIsInstance(pair_hash, integer_types) self.assertLess(pair_hash, 2**power) self.assertGreaterEqual(pair_hash, 0)
def getECFPstringsRadiusN_avg_ifp(mols_info, heavy_atoms=0, base_prop=['AtomicMass'], sa_dicts=[{}, {}], contacts=[[], []], ifptype='splif', degrees=[1, 1], parameters=[{ 'weighted': 0, 'alpha': -1, 'alpha_step': 0.1 }, { 'weighted': 0, 'alpha': -1, 'alpha_step': 0.1 }], hash_type='str', idf_power=64): """Obtain pairs of molecular fragments of contacting molecules outward to given degree, using the splif or plec procedure. For each fragment in a pair, compute average atomic properties (and SMILES stringfor now) and hash to an integer. Parameters: mols_info - a list of two tuples each describing a molecule (coordinates, rdkit.Chem.rdchem.Mol molecule, weights) it represents a pair of contacting molecules (e.g. protein and ligand) heavy_atoms, base_prop, hash_type and ifp_power same as in getECFPstringsRadiusN_avg_ecfp sa_dicts - a list of two dictionaries each mapping atom indices to their solid angles in a molecule contacts - a list of two index lists each indicating the queried atoms in a molecule ifptype - either 'splif' or 'plec' degrees - ecfp radii parameters - a list of parameter dictionaries, each for calculating the solid angles of surface atoms of a molecule Returns a dictionary mapping atom-pair indices to a string or vector that is to be hashed later. """ ecfp_dict = {} mols = [mols_info[0][1], mols_info[1][1]] nPairs = len(contacts[0]) if nPairs == 0: print('Wrong contact list!') return ecfp_dict else: neighborhoods = [] deadAtomPairs = {} sa_lists = [{}, {}] if len([p for p in base_prop if 'SolidAngle' in p]) > 0: for i in [0, 1]: if len(sa_dicts[i]) == 0: tmp = concave_hull_3D( points=mols_info[i][0], weights=mols_info[i][2], alpha=parameters[i]['alpha'], alpha_step=parameters[i]['alpha_step']) tmp.construct_conchull() sa_lists[i] = tmp else: sa_lists[i] = sa_dicts[i] if ifptype == 'splif': dg_pairs = [(degrees[0], degrees[1])] elif ifptype == 'plec': dg_pairs = plec_pairing(plec_degrees=degrees) else: print('Wrong ifp type!') return ecfp_dict for dgs in dg_pairs: neighborhoodThisRound = [] for (a1, a2) in zip(contacts[0], contacts[1]): inds = (int(a1), int(a2)) if inds not in deadAtomPairs: atoms = (mols[0].GetAtomWithIdx(inds[0]), mols[1].GetAtomWithIdx(inds[1])) sign1 = (heavy_atoms and (atoms[0].GetAtomicNum() == 1 or atoms[1].GetAtomicNum() == 1)) sign2 = (atoms[0].GetDegree() == 0 or atoms[1].GetDegree() == 0) if sign1 or sign2: deadAtomPairs[inds] = 1 continue nbhd_pairs = [] for k in [0, 1]: env = list( Chem.FindAtomEnvironmentOfRadiusN( mols[k], dgs[k], inds[k], useHs=not heavy_atoms)) env.sort() tmp_aids = set([ mols[k].GetBondWithIdx(bid).GetBeginAtomIdx() for bid in env ] + [ mols[k].GetBondWithIdx(bid).GetEndAtomIdx() for bid in env ]) env_aids = set([inds[k] ]) if len(tmp_aids) == 0 else tmp_aids tmpprop = get_atom_proplist(mol=mols[k], sa_dict=sa_lists[k], aids=env_aids, base_prop=base_prop, hash_type=hash_type) # submol = Chem.PathToSubmol(mols[k], env) # tmp_smile = Chem.MolToSmiles(submol) # smile = atoms[k].GetSymbol() if tmp_smile == '' else tmp_smile # tmpprop += [smile] nbhd_pairs.append((env, tmpprop, inds[k])) if dgs == (0, 0): if hash_type == 'str': tobehashed = (','.join(nbhd_pairs[0][1]), ','.join(nbhd_pairs[1][1])) idf = hash_ecfp_pair(ecfp_pair=tobehashed, power=idf_power) elif hash_type == 'vec': tobehashed = (tuple(nbhd_pairs[0][1]), tuple(nbhd_pairs[1][1])) idf = hash(tobehashed) else: print('Wrong hash type!!!') return ecfp_dict ecfp_dict[(inds, 'r0-r0')] = idf else: neighborhoodThisRound.append(nbhd_pairs) if (nbhd_pairs[0][0], nbhd_pairs[1][0]) in neighborhoods: deadAtomPairs[inds] = 1 if dgs != (0, 0): neighborhoodThisRound.sort() for candidate in neighborhoodThisRound: envs = (candidate[0][0], candidate[1][0]) cand_inds = (candidate[0][2], candidate[1][2]) if envs not in neighborhoods: neighborhoods.append(envs) if hash_type == 'str': tobehashed = (','.join(candidate[0][1]), ','.join(candidate[1][1])) idf = hash_ecfp_pair(ecfp_pair=tobehashed, power=idf_power) elif hash_type == 'vec': tobehashed = (tuple(candidate[0][1]), tuple(candidate[1][1])) idf = hash(tobehashed) else: print('Wrong hash type!!!') return ecfp_dict ecfp_dict[(cand_inds, 'r' + str(dgs[0]) + '-r' + str(dgs[1]))] = idf else: deadAtomPairs[cand_inds] = 1 return ecfp_dict