Ejemplo n.º 1
0
    def __mergeSortOne(self, array : list, p : int ,q : int, r : int) -> list:
        '''
        一步合并两堆牌排序算法过程

        Args
        ===
        `array` : a array like

        Returns
        ===
        `sortedArray` : 排序好的数组

        Raises
        ===
        `None`
        '''
        # python中变量名和对象是分离的
        # 此时A是array的一个引用
        A = array
        # 求数组的长度 然后分成两堆([p..q],[q+1..r]) ([0..q],[q+1..n-1])
        n = r + 1

        # 检测输入参数是否合理
        if q < 0 or q > n - 1:
            raise Exception("arg 'q' must not be in (0,len(array) range)")
        # n1 + n2 = n
        # 求两堆牌的长度
        n1 = q - p + 1
        n2 = r - q
        # 构造两堆牌(包含“哨兵牌”)
        L = _arange(n1 + 1, dtype=float)
        R = _arange(n2 + 1, dtype=float)
        # 将A分堆
        for i in range(n1):
            L[i] = A[p + i]
        for j in range(n2):
            R[j] = A[q + j + 1]
        # 加入无穷大“哨兵牌”, 对不均匀分堆的完美解决
        L[n1] = _math.inf
        R[n2] = _math.inf
        # 因为合并排序的前提是两堆牌是已经排序好的,所以这里排序一下
        # chapter2 = Chapter2()
        # L = chapter2.selectSortAscending(L)
        # R = chapter2.selectSortAscending(R)
        # 一直比较两堆牌的顶部大小大小放入新的堆中
        i, j = 0, 0
        for k in range(p, n):
            if L[i] <= R[j]:
                A[k] = L[i]
                i += 1
            else:
                A[k] = R[j]
                j += 1
        return A
Ejemplo n.º 2
0
    def radixsort(self, A, d):
        '''
        基数排序 平均时间复杂度为`Θ(nlgn)`

        Args
        ===
        `A` : 待排序的数组

        `d` : 数组A中每个元素都有d位数字/长度,其中第1位是最低位,第d位是最高位

        Return
        ===
        `sortedarray` : 排序好的数组 

        Example
        ===
        ```python
        >>> Chapter8_3().radixsort([54,43,32,21,11], 2)
        >>> [11, 21, 32, 43, 54]
        ```
        '''
        length = len(A)
        B = []
        for i in range(d):
            B.append(self.getarraystr_subarray(A, i + 1))
        for k in range(d):
            B[k] = self.countingsort(B[k], max(B[k]) + 1)
        C = _arange(length)
        for j in range(length):
            for i in range(d):
                C[j] += B[i][j] * 10**i
            C[j] = C[j] - j
        return C
Ejemplo n.º 3
0
    def __init__(self, receptor=None, ligand=None, forcefield=None, pH=7.0, addHs=True,
                 mpi_comm=None):

        if (receptor is not None) and (ligand is not None):
            if type(receptor) == str:
                receptor = Receptor(receptor,forcefield,pH,addHs,mpi_comm)
            if type(ligand) == str:
                ligand = Ligand(ligand,forcefield,pH,addHs,mpi_comm)

        self.modeller   = app.Modeller(receptor.topology, receptor.positions)
        self.modeller.add(ligand.topology, ligand.positions)
        self.topology   = self.modeller.getTopology()
        self.positions = self.modeller.getPositions() #np.vstack receptor.positions y ligand.positions
        self.receptor   = receptor
        self.receptor_atom_indices = _arange(0,receptor.n_atoms)
        self.ligand     = ligand
        self.ligand_atom_indices = _arange(receptor.n_atoms,receptor.n_atoms+ligand.n_atoms)
        self.complex_atom_indices = _arange(receptor.n_atoms+ligand.n_atoms)
        self.forcefield = self.receptor.forcefield
Ejemplo n.º 4
0
 def on_line_maximum(self, k , n):
     score = _arange(n)
     bestscore = -_math.inf
     for i in range(k):
         if score[i] > bestscore:
             bestscore = score[i]
     for i in range(k, n):
         if score[i] > bestscore:
             return i
     return n
Ejemplo n.º 5
0
    def center(self, geometrical_center = 'heavy'):

        if geometrical_center == 'heavy':
            tmp_list = self._heavy_atoms_indices
        elif geometrical_center == 'CA':
            tmp_list = self._ca_atoms_indices
        elif geometrical_center == 'All':
            tmp_list = _arange(self.n_atoms)

        tmp_positions = self.get_positions()
        geometrical_center_positions = utils.geometrical_center(tmp_positions,tmp_list)
        tmp_positions = tmp_positions - geometrical_center_positions
        self.set_positions(tmp_positions)
Ejemplo n.º 6
0
 def __init__(self,radii,mols,reference_substructure_keys={}):
     self.radii = radii
     self.max_radius = max(radii)
     if type(mols) != list: mols =  mols = [ext.mols[i] for i in _arange(0,len(mols))] 
     self.mols = mols
     self.reference_substructure_keys = reference_substructure_keys
     self.substructure_dictionary = {}
     self.mols_reference_for_unhashed = None
     self.columns_unhashed = None 
     self.substructure_ids = None
     # output
     self.fps_hashed_binary_quick = None
     self.fps_hashed_binary = None
     self.fps_hashed_counts = None
     self.fps_unhashed_binary = None
     self.fps_unhashed_counts = None
     self.substructures_smiles = {}
Ejemplo n.º 7
0
 def __init__(self, radii, mols, reference_substructure_keys={}):
     self.radii = radii
     self.max_radius = max(radii)
     if type(mols) != list:
         mols = [ext.mols[i] for i in _arange(0, len(mols))]
     self.mols = mols
     self.reference_substructure_keys = reference_substructure_keys
     self.substructure_dictionary = {}
     self.mols_reference_for_unhashed = None
     self.columns_unhashed = None
     self.substructure_ids = None
     # output
     self.fps_hashed_binary_quick = None
     self.fps_hashed_binary = None
     self.fps_hashed_counts = None
     self.fps_unhashed_binary = None
     self.fps_unhashed_counts = None
     self.substructures_smiles = {}
Ejemplo n.º 8
0
    def calculate_unhashed_fps(self,
                               draw_substructures=False,
                               image_directory='./images_substructures'):
        # get the dictionary for the substructures
        idxs = []
        substr_ids = []
        counts = []
        for mol_index, mol in enumerate(self.mols):
            info = {}
            fp = _GetMorganFingerprint(mol,
                                       radius=self.max_radius,
                                       bitInfo=info)
            substructure_dictionary = {
                k: [mol_index]
                for k, v in info.iteritems() if v[0][1] in self.radii
            }
            substr_ids.append(substructure_dictionary.keys())
            idxs.append([mol_index] * len(substructure_dictionary.keys()))
            counts.append([
                len(info.values()[x]) for x in _arange(0, len(info))
                if info.values()[x][0][1] in self.radii
            ])

            # get the smiles for the substructures
            amap = {}
            substructures_smiles = {
                k: [
                    _MolToSmiles(
                        _PathToSubmol(mol,
                                      _FindAtomEnvironmentOfRadiusN(
                                          mol, v[0][1], v[0][0]),
                                      atomMap=amap))
                ]
                for k, v in info.iteritems() if v[0][1] in self.radii
            }
            self.substructures_smiles.update(substructures_smiles)

            # generate the images for the substructures if required..
            if draw_substructures:
                if not _exists(image_directory):
                    _makedirs(image_directory)
                for k, v in info.iteritems():
                    if k not in self.substructure_dictionary.keys(
                    ) and v[0][1] in self.radii:
                        image_name = "%s/Molecule_%d_substr_%d.pdf" % (
                            image_directory, mol_index, k)
                        env = _FindAtomEnvironmentOfRadiusN(
                            mol, v[0][1], v[0][0])
                        amap = {}
                        submol = _PathToSubmol(mol, env, atomMap=amap)
                        _MolToFile(mol,
                                   image_name,
                                   size=(300, 300),
                                   wedgeBonds=True,
                                   kekulize=True,
                                   highlightAtoms=amap.keys())

            self.substructure_dictionary = self._combine_dicts(
                substructure_dictionary, self.substructure_dictionary)

        idxs = _array([val for sublist in idxs for val in sublist])
        counts = _array([val for sublist in counts for val in sublist])
        substr_ids_flattened = [
            val for sublist in substr_ids for val in sublist
        ]
        substr_ids = _array(substr_ids_flattened)
        self.substructure_ids = substr_ids
        if len(self.reference_substructure_keys) == 0:
            print(
                "No input set of keys for the substructures. \nThus, the substructures present in the input molecules will be considered for the calculation of unhashed fingerprints."
            )
            columns = _array(list(set(self.substructure_dictionary.keys())))
            columns = _sort(columns)
            self.columns_unhashed = columns
            dimensionality_unhashed = len(columns)
        else:
            columns = _array(list(set(self.reference_substructure_keys)))
            columns = _sort(columns)
            self.columns_unhashed = columns
            dimensionality_unhashed = len(columns)

        fps_unhashed_binary = _zeros((len(self.mols), dimensionality_unhashed),
                                     dtype=int)
        fps_unhashed_counts = _zeros((len(self.mols), dimensionality_unhashed),
                                     dtype=int)

        # removing the indices corresponding to the substructures in the test molecules not present in the references set of substructures..
        idxs = _array([
            idxs[x] for x in _arange(0, len(substr_ids))
            if substr_ids[x] in self.columns_unhashed
        ])
        counts = _array([
            counts[x] for x in _arange(0, len(substr_ids))
            if substr_ids[x] in self.columns_unhashed
        ])
        substr_ids = _array([
            substr_ids[x] for x in _arange(0, len(substr_ids))
            if substr_ids[x] in self.columns_unhashed
        ])
        mapping = _array([(substr_ids[x] == columns).nonzero()
                          for x in _arange(0, len(substr_ids))])
        mapping = mapping.flatten()
        if len(mapping) == 0:
            print(
                "There is no intersection between the substructures \n(i)provided in the reference key set, and\n(ii) the substructures found in the input molecules."
            )
            return

        fps_unhashed_binary[idxs, mapping] = _ones(len(counts))
        fps_unhashed_counts[idxs, mapping] = counts
        self.fps_unhashed_binary = fps_unhashed_binary
        self.fps_unhashed_counts = fps_unhashed_counts
Ejemplo n.º 9
0
    def calculate_unhashed_fps(self,draw_substructures=False,image_directory='./images_substructures'): 
        # get the dictionary for the substructures
        idxs = []
        substr_ids = []
        counts=[]
        substructure_dictionaries = []    
        for mol_index,mol in enumerate(self.mols):
            info={}
            fp = _GetMorganFingerprint(mol,radius=self.max_radius,bitInfo=info)
            substructure_dictionary = {k:mol_index for k,v in info.iteritems() if v[0][1] in self.radii}
            substructure_dictionaries.append({k:mol_index for k,v in info.iteritems() if v[0][1] in self.radii})
            substr_ids.append(substructure_dictionary.keys())
            idxs.append([mol_index]*len(substructure_dictionary.keys()))
            counts.append([ len(info.values()[x]) for x in _arange(0,len(info)) if info.values()[x][0][1] in self.radii])
            
            # get the smiles for the substructures
            amap = {}
            substructures_smiles = {k:[_MolToSmiles(_PathToSubmol(mol,_FindAtomEnvironmentOfRadiusN(mol,v[0][1],v[0][0]),atomMap=amap))] for k,v in info.iteritems() if v[0][1] in self.radii}
            self.substructures_smiles.update(substructures_smiles)
            
            # generate the images for the substructures if required..
            if draw_substructures:
                if not _exists(image_directory):
                    _makedirs(image_directory)
                for k,v in info.iteritems():
                    if k not in self.substructure_dictionary.keys() and v[0][1] in self.radii:
                        image_name="%s/Molecule_%d_substr_%d.pdf"%(image_directory,mol_index,k)
                        env=_FindAtomEnvironmentOfRadiusN(mol,v[0][1],v[0][0])
                        amap={}
                        submol=_PathToSubmol(mol,env,atomMap=amap)
                        _MolToFile(mol,image_name,size=(300,300),wedgeBonds=True,kekulize=True,highlightAtoms=amap.keys())
            
        #self.substructure_dictionary = self._combine_dicts(substructure_dictionary,self.substructure_dictionary)
        for d in substructure_dictionaries:
             for k, v in d.iteritems():
               l=self.substructure_dictionary.setdefault(k,[])
               if v not in l:
                 l.append(v)
            
        idxs = _array([val for sublist in idxs for val in sublist])
        counts = _array([val for sublist in counts for val in sublist])
        substr_ids_flattened = [val for sublist in substr_ids for val in sublist]
        substr_ids = _array(substr_ids_flattened)
        self.substructure_ids = substr_ids
        if len(self.reference_substructure_keys)==0:
            print "No input set of keys for the substructures. \nThus, the substructures present in the input molecules will be considered for the calculation of unhashed fingerprints."
            columns = _array(list(set(self.substructure_dictionary.keys())))
            columns = _sort(columns)
            self.columns_unhashed = columns
            dimensionality_unhashed = len(columns)
        else:
            columns = _array(self.reference_substructure_keys)
            columns = _sort(columns)
            self.columns_unhashed = columns
            dimensionality_unhashed = len(columns)
        
        fps_unhashed_binary = _zeros((len(self.mols),dimensionality_unhashed), dtype=int)
        fps_unhashed_counts = _zeros((len(self.mols),dimensionality_unhashed), dtype=int)

            
        mapping = _array([(substr_ids[x]==columns).nonzero() for x in _arange(0,len(substr_ids))])
        mapping = mapping.flatten()
        idxs = _array([idxs[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0])
        counts = _array([counts[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0])
        mapping = _array([mapping[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0])
        if len(mapping) == 0:
            print "There is no intersection between the substructures \n(i)provided in the reference key set, and\n(ii) the substructures found in the input molecules."
            return
        
        fps_unhashed_binary[idxs,mapping] = _ones(len(mapping))
        fps_unhashed_counts[idxs,mapping] = counts
        self.fps_unhashed_binary = fps_unhashed_binary
        self.fps_unhashed_counts = fps_unhashed_counts
Ejemplo n.º 10
0
    def get_residue_indices_in_helix(self, helix=None):

        return _arange(self.helices[helix][0],self.helices[helix][1]+1)
Ejemplo n.º 11
0
def arange(i):
    return _arange(i)