def __mergeSortOne(self, array : list, p : int ,q : int, r : int) -> list: ''' 一步合并两堆牌排序算法过程 Args === `array` : a array like Returns === `sortedArray` : 排序好的数组 Raises === `None` ''' # python中变量名和对象是分离的 # 此时A是array的一个引用 A = array # 求数组的长度 然后分成两堆([p..q],[q+1..r]) ([0..q],[q+1..n-1]) n = r + 1 # 检测输入参数是否合理 if q < 0 or q > n - 1: raise Exception("arg 'q' must not be in (0,len(array) range)") # n1 + n2 = n # 求两堆牌的长度 n1 = q - p + 1 n2 = r - q # 构造两堆牌(包含“哨兵牌”) L = _arange(n1 + 1, dtype=float) R = _arange(n2 + 1, dtype=float) # 将A分堆 for i in range(n1): L[i] = A[p + i] for j in range(n2): R[j] = A[q + j + 1] # 加入无穷大“哨兵牌”, 对不均匀分堆的完美解决 L[n1] = _math.inf R[n2] = _math.inf # 因为合并排序的前提是两堆牌是已经排序好的,所以这里排序一下 # chapter2 = Chapter2() # L = chapter2.selectSortAscending(L) # R = chapter2.selectSortAscending(R) # 一直比较两堆牌的顶部大小大小放入新的堆中 i, j = 0, 0 for k in range(p, n): if L[i] <= R[j]: A[k] = L[i] i += 1 else: A[k] = R[j] j += 1 return A
def radixsort(self, A, d): ''' 基数排序 平均时间复杂度为`Θ(nlgn)` Args === `A` : 待排序的数组 `d` : 数组A中每个元素都有d位数字/长度,其中第1位是最低位,第d位是最高位 Return === `sortedarray` : 排序好的数组 Example === ```python >>> Chapter8_3().radixsort([54,43,32,21,11], 2) >>> [11, 21, 32, 43, 54] ``` ''' length = len(A) B = [] for i in range(d): B.append(self.getarraystr_subarray(A, i + 1)) for k in range(d): B[k] = self.countingsort(B[k], max(B[k]) + 1) C = _arange(length) for j in range(length): for i in range(d): C[j] += B[i][j] * 10**i C[j] = C[j] - j return C
def __init__(self, receptor=None, ligand=None, forcefield=None, pH=7.0, addHs=True, mpi_comm=None): if (receptor is not None) and (ligand is not None): if type(receptor) == str: receptor = Receptor(receptor,forcefield,pH,addHs,mpi_comm) if type(ligand) == str: ligand = Ligand(ligand,forcefield,pH,addHs,mpi_comm) self.modeller = app.Modeller(receptor.topology, receptor.positions) self.modeller.add(ligand.topology, ligand.positions) self.topology = self.modeller.getTopology() self.positions = self.modeller.getPositions() #np.vstack receptor.positions y ligand.positions self.receptor = receptor self.receptor_atom_indices = _arange(0,receptor.n_atoms) self.ligand = ligand self.ligand_atom_indices = _arange(receptor.n_atoms,receptor.n_atoms+ligand.n_atoms) self.complex_atom_indices = _arange(receptor.n_atoms+ligand.n_atoms) self.forcefield = self.receptor.forcefield
def on_line_maximum(self, k , n): score = _arange(n) bestscore = -_math.inf for i in range(k): if score[i] > bestscore: bestscore = score[i] for i in range(k, n): if score[i] > bestscore: return i return n
def center(self, geometrical_center = 'heavy'): if geometrical_center == 'heavy': tmp_list = self._heavy_atoms_indices elif geometrical_center == 'CA': tmp_list = self._ca_atoms_indices elif geometrical_center == 'All': tmp_list = _arange(self.n_atoms) tmp_positions = self.get_positions() geometrical_center_positions = utils.geometrical_center(tmp_positions,tmp_list) tmp_positions = tmp_positions - geometrical_center_positions self.set_positions(tmp_positions)
def __init__(self,radii,mols,reference_substructure_keys={}): self.radii = radii self.max_radius = max(radii) if type(mols) != list: mols = mols = [ext.mols[i] for i in _arange(0,len(mols))] self.mols = mols self.reference_substructure_keys = reference_substructure_keys self.substructure_dictionary = {} self.mols_reference_for_unhashed = None self.columns_unhashed = None self.substructure_ids = None # output self.fps_hashed_binary_quick = None self.fps_hashed_binary = None self.fps_hashed_counts = None self.fps_unhashed_binary = None self.fps_unhashed_counts = None self.substructures_smiles = {}
def __init__(self, radii, mols, reference_substructure_keys={}): self.radii = radii self.max_radius = max(radii) if type(mols) != list: mols = [ext.mols[i] for i in _arange(0, len(mols))] self.mols = mols self.reference_substructure_keys = reference_substructure_keys self.substructure_dictionary = {} self.mols_reference_for_unhashed = None self.columns_unhashed = None self.substructure_ids = None # output self.fps_hashed_binary_quick = None self.fps_hashed_binary = None self.fps_hashed_counts = None self.fps_unhashed_binary = None self.fps_unhashed_counts = None self.substructures_smiles = {}
def calculate_unhashed_fps(self, draw_substructures=False, image_directory='./images_substructures'): # get the dictionary for the substructures idxs = [] substr_ids = [] counts = [] for mol_index, mol in enumerate(self.mols): info = {} fp = _GetMorganFingerprint(mol, radius=self.max_radius, bitInfo=info) substructure_dictionary = { k: [mol_index] for k, v in info.iteritems() if v[0][1] in self.radii } substr_ids.append(substructure_dictionary.keys()) idxs.append([mol_index] * len(substructure_dictionary.keys())) counts.append([ len(info.values()[x]) for x in _arange(0, len(info)) if info.values()[x][0][1] in self.radii ]) # get the smiles for the substructures amap = {} substructures_smiles = { k: [ _MolToSmiles( _PathToSubmol(mol, _FindAtomEnvironmentOfRadiusN( mol, v[0][1], v[0][0]), atomMap=amap)) ] for k, v in info.iteritems() if v[0][1] in self.radii } self.substructures_smiles.update(substructures_smiles) # generate the images for the substructures if required.. if draw_substructures: if not _exists(image_directory): _makedirs(image_directory) for k, v in info.iteritems(): if k not in self.substructure_dictionary.keys( ) and v[0][1] in self.radii: image_name = "%s/Molecule_%d_substr_%d.pdf" % ( image_directory, mol_index, k) env = _FindAtomEnvironmentOfRadiusN( mol, v[0][1], v[0][0]) amap = {} submol = _PathToSubmol(mol, env, atomMap=amap) _MolToFile(mol, image_name, size=(300, 300), wedgeBonds=True, kekulize=True, highlightAtoms=amap.keys()) self.substructure_dictionary = self._combine_dicts( substructure_dictionary, self.substructure_dictionary) idxs = _array([val for sublist in idxs for val in sublist]) counts = _array([val for sublist in counts for val in sublist]) substr_ids_flattened = [ val for sublist in substr_ids for val in sublist ] substr_ids = _array(substr_ids_flattened) self.substructure_ids = substr_ids if len(self.reference_substructure_keys) == 0: print( "No input set of keys for the substructures. \nThus, the substructures present in the input molecules will be considered for the calculation of unhashed fingerprints." ) columns = _array(list(set(self.substructure_dictionary.keys()))) columns = _sort(columns) self.columns_unhashed = columns dimensionality_unhashed = len(columns) else: columns = _array(list(set(self.reference_substructure_keys))) columns = _sort(columns) self.columns_unhashed = columns dimensionality_unhashed = len(columns) fps_unhashed_binary = _zeros((len(self.mols), dimensionality_unhashed), dtype=int) fps_unhashed_counts = _zeros((len(self.mols), dimensionality_unhashed), dtype=int) # removing the indices corresponding to the substructures in the test molecules not present in the references set of substructures.. idxs = _array([ idxs[x] for x in _arange(0, len(substr_ids)) if substr_ids[x] in self.columns_unhashed ]) counts = _array([ counts[x] for x in _arange(0, len(substr_ids)) if substr_ids[x] in self.columns_unhashed ]) substr_ids = _array([ substr_ids[x] for x in _arange(0, len(substr_ids)) if substr_ids[x] in self.columns_unhashed ]) mapping = _array([(substr_ids[x] == columns).nonzero() for x in _arange(0, len(substr_ids))]) mapping = mapping.flatten() if len(mapping) == 0: print( "There is no intersection between the substructures \n(i)provided in the reference key set, and\n(ii) the substructures found in the input molecules." ) return fps_unhashed_binary[idxs, mapping] = _ones(len(counts)) fps_unhashed_counts[idxs, mapping] = counts self.fps_unhashed_binary = fps_unhashed_binary self.fps_unhashed_counts = fps_unhashed_counts
def calculate_unhashed_fps(self,draw_substructures=False,image_directory='./images_substructures'): # get the dictionary for the substructures idxs = [] substr_ids = [] counts=[] substructure_dictionaries = [] for mol_index,mol in enumerate(self.mols): info={} fp = _GetMorganFingerprint(mol,radius=self.max_radius,bitInfo=info) substructure_dictionary = {k:mol_index for k,v in info.iteritems() if v[0][1] in self.radii} substructure_dictionaries.append({k:mol_index for k,v in info.iteritems() if v[0][1] in self.radii}) substr_ids.append(substructure_dictionary.keys()) idxs.append([mol_index]*len(substructure_dictionary.keys())) counts.append([ len(info.values()[x]) for x in _arange(0,len(info)) if info.values()[x][0][1] in self.radii]) # get the smiles for the substructures amap = {} substructures_smiles = {k:[_MolToSmiles(_PathToSubmol(mol,_FindAtomEnvironmentOfRadiusN(mol,v[0][1],v[0][0]),atomMap=amap))] for k,v in info.iteritems() if v[0][1] in self.radii} self.substructures_smiles.update(substructures_smiles) # generate the images for the substructures if required.. if draw_substructures: if not _exists(image_directory): _makedirs(image_directory) for k,v in info.iteritems(): if k not in self.substructure_dictionary.keys() and v[0][1] in self.radii: image_name="%s/Molecule_%d_substr_%d.pdf"%(image_directory,mol_index,k) env=_FindAtomEnvironmentOfRadiusN(mol,v[0][1],v[0][0]) amap={} submol=_PathToSubmol(mol,env,atomMap=amap) _MolToFile(mol,image_name,size=(300,300),wedgeBonds=True,kekulize=True,highlightAtoms=amap.keys()) #self.substructure_dictionary = self._combine_dicts(substructure_dictionary,self.substructure_dictionary) for d in substructure_dictionaries: for k, v in d.iteritems(): l=self.substructure_dictionary.setdefault(k,[]) if v not in l: l.append(v) idxs = _array([val for sublist in idxs for val in sublist]) counts = _array([val for sublist in counts for val in sublist]) substr_ids_flattened = [val for sublist in substr_ids for val in sublist] substr_ids = _array(substr_ids_flattened) self.substructure_ids = substr_ids if len(self.reference_substructure_keys)==0: print "No input set of keys for the substructures. \nThus, the substructures present in the input molecules will be considered for the calculation of unhashed fingerprints." columns = _array(list(set(self.substructure_dictionary.keys()))) columns = _sort(columns) self.columns_unhashed = columns dimensionality_unhashed = len(columns) else: columns = _array(self.reference_substructure_keys) columns = _sort(columns) self.columns_unhashed = columns dimensionality_unhashed = len(columns) fps_unhashed_binary = _zeros((len(self.mols),dimensionality_unhashed), dtype=int) fps_unhashed_counts = _zeros((len(self.mols),dimensionality_unhashed), dtype=int) mapping = _array([(substr_ids[x]==columns).nonzero() for x in _arange(0,len(substr_ids))]) mapping = mapping.flatten() idxs = _array([idxs[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0]) counts = _array([counts[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0]) mapping = _array([mapping[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0]) if len(mapping) == 0: print "There is no intersection between the substructures \n(i)provided in the reference key set, and\n(ii) the substructures found in the input molecules." return fps_unhashed_binary[idxs,mapping] = _ones(len(mapping)) fps_unhashed_counts[idxs,mapping] = counts self.fps_unhashed_binary = fps_unhashed_binary self.fps_unhashed_counts = fps_unhashed_counts
def get_residue_indices_in_helix(self, helix=None): return _arange(self.helices[helix][0],self.helices[helix][1]+1)
def arange(i): return _arange(i)