Exemple #1
0
def read_gufm_all(filename=data_file):
    '''

    Parameters
    ----------
    filename

    Returns
    -------

    '''
    with open(filename,'rb') as f:
        f.readline()
        line1 = f.readline().split()

        l_max = int(line1[0])
        nspl = int(line1[1])
        n = l_max*(l_max+2)

        gt = _zeros(n*nspl)
        tknts = _zeros(nspl+4)
        tknts[:3] = [float(x) for x in line1[2:]]
        ti = 3
        gi = 0
        for line in f:
            if ti+4 <= len(tknts):
                tknts[ti:ti+4] = [float(x) for x in line.split()]
                ti += 4
            else:
                gt[gi:gi+4] = [float(x) for x in line.split()]
                gi += 4
    gt_out = gt.reshape(n, nspl, order='F')
    return gt_out, tknts, l_max, nspl
Exemple #2
0
def read_gufm_all(filename=data_file):
    '''

    Parameters
    ----------
    filename

    Returns
    -------

    '''
    with open(filename, 'rb') as f:
        f.readline()
        line1 = f.readline().split()

        l_max = int(line1[0])
        nspl = int(line1[1])
        n = l_max * (l_max + 2)

        gt = _zeros(n * nspl)
        tknts = _zeros(nspl + 4)
        tknts[:3] = [float(x) for x in line1[2:]]
        ti = 3
        gi = 0
        for line in f:
            if ti + 4 <= len(tknts):
                tknts[ti:ti + 4] = [float(x) for x in line.split()]
                ti += 4
            else:
                gt[gi:gi + 4] = [float(x) for x in line.split()]
                gi += 4
    gt_out = gt.reshape(n, nspl, order='F')
    return gt_out, tknts, l_max, nspl
Exemple #3
0
 def calculate_hashed_fps_counts(self,nBits):
     # count format
     fps_hashed_binary = _zeros((len(self.mols),nBits), dtype=int)
     fps_hashed_counts = _zeros((len(self.mols),nBits), dtype=int)
     for mol_index,mol in enumerate(self.mols): 
         info={}
         fp = _GetMorganFingerprint(mol,radius=self.max_radius,bitInfo=info)
         for key,val in info.iteritems():
             if val[0][1] in self.radii: #check if the radius is in the selection
                 fps_hashed_binary[mol_index,key%nBits] = 1
                 fps_hashed_counts[mol_index,key%nBits] += len(val)
     self.fps_hashed_binary = fps_hashed_binary
     self.fps_hashed_counts = fps_hashed_counts
Exemple #4
0
def calculate_gt_raw(gt, spl, nleft, l_max=14, jorder=4):
    '''
    Calculates the Gauss Coefficients in raw ordering given the parameters calculated by inverval() and bspline().

    Parameters
    ----------
    gt:
        raw data from gufm1 (n x nspl numpy array)
    spl:
        B-spline basis (jorder numpy array)
    nleft:
        coordinate of the timeknot to the left of desired time
    l_max:
        spherical harmonic degree included in model (14)
    jorder:
        order of B-splines (4)
    Returns
    -------
        Gauss Coefficients for time in raw ordering.
    '''
    n = l_max * (l_max + 2)
    g_raw = _zeros(n)
    for k in range(n):
        for j in range(jorder):
            g_raw[k] += spl[j] * gt[k, j + nleft - 4]
    return g_raw
Exemple #5
0
 def calculate_hashed_fps(self, nBits):
     # count format
     fps_hashed_binary = _zeros((len(self.mols), nBits), dtype=int)
     fps_hashed_counts = _zeros((len(self.mols), nBits), dtype=int)
     for mol_index, mol in enumerate(self.mols):
         info = {}
         fp = _GetMorganFingerprint(mol,
                                    radius=self.max_radius,
                                    bitInfo=info)
         for key, val in info.iteritems():
             if val[0][
                     1] in self.radii:  #check if the radius is in the selection
                 fps_hashed_binary[mol_index, key % nBits] = 1
                 fps_hashed_counts[mol_index, key % nBits] += len(val)
     self.fps_hashed_binary = fps_hashed_binary
     self.fps_hashed_counts = fps_hashed_counts
Exemple #6
0
def calculate_gt_raw(gt, spl, nleft, l_max=14, jorder=4):
    '''
    Calculates the Gauss Coefficients in raw ordering given the parameters calculated by inverval() and bspline().

    Parameters
    ----------
    gt:
        raw data from gufm1 (n x nspl numpy array)
    spl:
        B-spline basis (jorder numpy array)
    nleft:
        coordinate of the timeknot to the left of desired time
    l_max:
        spherical harmonic degree included in model (14)
    jorder:
        order of B-splines (4)
    Returns
    -------
        Gauss Coefficients for time in raw ordering.
    '''
    n = l_max*(l_max+2)
    g_raw = _zeros(n)
    for k in range(n):
        for j in range(jorder):
            g_raw[k] += spl[j]*gt[k,j+nleft-4]
    return g_raw
Exemple #7
0
    def generate(self, xoffset, zoffset, xsize, zsize, xscale, zscale):

        size = xsize * zsize
        elevs = _zeros(xsize * zsize)
        positions = empty(size * 6, dtype=Vector3f)
        normals = _zeros(size * 6, dtype=Vector3f._UNIT)
        colors = empty(size, dtype=Vector3f)

        index = 0
        for iz in xrange(zsize):
            for ix in xrange(xsize):

                # Determine the positions of the quad
                p1 = Vector3f(xoffset + ix, 0, zoffset + iz)
                p2 = Vector3f(xoffset + ix, 0, zoffset + iz + 1)
                p3 = Vector3f(xoffset + ix + 1, 0, zoffset + iz + 1)
                p4 = Vector3f(xoffset + ix + 1, 0, zoffset + iz)

                positions[index:index + 6] = [p1, p2, p4, p4, p2, p3]
Exemple #8
0
    def _read_data(self, data_file=None):
        """ read data from datafile

        :param data_file:
        :return:
        """
        if data_file is None:
            data_file = self.data_file
        with open(data_file,'rb') as f:
            f.readline()
            line1 = f.readline().split()

            l_max = int(line1[0])
            nspl = int(line1[1])
            if float(line1[2]) < 1000:
                bspl_order = int(line1[2])
                l1_tknt_loc = 3
            else:
                bspl_order = 4
                l1_tknt_loc = 2

            n = l_max*(l_max+2)

            gt = _zeros(n*nspl)
            tknts = _zeros(nspl+bspl_order)
            tknt_l1 = [float(x) for x in line1[l1_tknt_loc:]]
            tknts[:len(tknt_l1)] = tknt_l1
            ti = len(tknt_l1)
            gi = 0
            for line in f:
                l_tmp = [float(x) for x in line.split()]
                nl = len(l_tmp)
                if ti+nl <= len(tknts):
                    tknts[ti:ti+nl] = l_tmp
                    ti += nl
                else:
                    gt[gi:gi+nl] = l_tmp
                    gi += nl
        gt_out = gt.reshape(n, nspl, order='F')
        return gt_out, tknts, l_max, bspl_order
Exemple #9
0
def bspline(time, tknts, jorder=4):
    '''
    Calculates B-spline and time knot index location for time t.

    Parameters
    ----------
    time:
        time to calculate
    tknts:
        array of time-knots
    jorder:
        order of b-splines

    Returns
    -------
    nleft:
        index of the time knot on the left of the interval (tknts[nleft] <= time <= tknts[nleft+1])
    spl:
        array of dimension jorder (default 4) containing the spline factors at time t.
    '''

    nleft = interval(tknts, time)

    deltal = _zeros(jorder - 1)
    deltar = _zeros(jorder - 1)
    spline = _zeros(jorder)

    spline[0] = 1.0
    for j in range(jorder - 1):
        deltar[j] = tknts[nleft + j + 1] - time
        deltal[j] = time - tknts[nleft - j]
        saved = 0.0
        for i in range(j + 1):
            term = spline[i] / (deltar[i] + deltal[j - i])
            spline[i] = saved + deltar[i] * term
            saved = deltal[j - i] * term
        spline[j + 1] = saved
    return nleft, spline
Exemple #10
0
def bspline(time, tknts, jorder=4):
    '''
    Calculates B-spline and time knot index location for time t.

    Parameters
    ----------
    time:
        time to calculate
    tknts:
        array of time-knots
    jorder:
        order of b-splines

    Returns
    -------
    nleft:
        index of the time knot on the left of the interval (tknts[nleft] <= time <= tknts[nleft+1])
    spl:
        array of dimension jorder (default 4) containing the spline factors at time t.
    '''

    nleft = interval(tknts, time)

    deltal = _zeros(jorder-1)
    deltar = _zeros(jorder-1)
    spline = _zeros(jorder)

    spline[0] = 1.0
    for j in range(jorder-1):
        deltar[j] = tknts[nleft+j+1] - time
        deltal[j] = time - tknts[nleft-j]
        saved = 0.0
        for i in range(j+1):
            term = spline[i]/(deltar[i]+deltal[j-i])
            spline[i] = saved + deltar[i]*term
            saved = deltal[j-i]*term
        spline[j+1] = saved
    return nleft, spline
Exemple #11
0
    def __init__(self, max_size, stride=8):
        """ Constructor.

        Parameters
        -----------
        max_size : (:obj:`int`)
            The maximum number of vertices housed within this manager's batch.
        stride : (:obj:`int`)
            The element size of one vertex.
        """

        self._max_size = max_size
        self._stride = stride
        self._empty = []
        self._last = None
        self._index_last = 0

        # GPU vertex array, buffer object references
        self._data = _zeros(max_size * stride, dtype=FLOAT32)
        self._vao, self._vbo = create_batch_buffer(self._data.nbytes,
                                                   [3, 2, 3], GL_STATIC_DRAW)
Exemple #12
0
    def defrag_quick(self):
        """ Refactor the first empty chunk in the batch. """

        if len(self._empty) == 0: return False

        # Pop the first gap and link the prev and next chunks together
        removed = self._empty.pop(0)
        removed.get_next().set_previous(removed.get_previous())
        rlength = removed.get_length()

        accum = []
        length = 0
        current = removed.get_next()
        while current is not None and current.is_gap(
        ) and length < MemoryManager.MAX_REFACTOR:
            current.shift(-rlength)
            accum.append(current.get_data())
            length += current.get_length() / MemoryChunk.MAX_FLOAT_COUNT
            current = current.get_next()

        if current is None:
            # TODO: Need to flatten the accumulated array
            self.__store_to_gpu(removed.get_index_first(), accum)
            self._index_last -= rlength
        else:

            accum.append(_zeros(rlength))
            self.__store_to_gpu(removed.get_index_first(), accum)
            if current.is_gap():
                # Next chunk is already a gap, so just make that gap bigger
                current.shrink(-rlength)
            else:
                # Add empty bubble at the end of the accumulated refactor data
                bubble = MemoryChunk.create_empty_chunk(
                    current.get_previous().get_index_last(), rlength)
                self._empty.insert(0, bubble)
                bubble.set_previous(current.get_previous())
                bubble.set_next(current)

        return True
Exemple #13
0
    def calculate_unhashed_fps(self,
                               draw_substructures=False,
                               image_directory='./images_substructures'):
        # get the dictionary for the substructures
        idxs = []
        substr_ids = []
        counts = []
        for mol_index, mol in enumerate(self.mols):
            info = {}
            fp = _GetMorganFingerprint(mol,
                                       radius=self.max_radius,
                                       bitInfo=info)
            substructure_dictionary = {
                k: [mol_index]
                for k, v in info.iteritems() if v[0][1] in self.radii
            }
            substr_ids.append(substructure_dictionary.keys())
            idxs.append([mol_index] * len(substructure_dictionary.keys()))
            counts.append([
                len(info.values()[x]) for x in _arange(0, len(info))
                if info.values()[x][0][1] in self.radii
            ])

            # get the smiles for the substructures
            amap = {}
            substructures_smiles = {
                k: [
                    _MolToSmiles(
                        _PathToSubmol(mol,
                                      _FindAtomEnvironmentOfRadiusN(
                                          mol, v[0][1], v[0][0]),
                                      atomMap=amap))
                ]
                for k, v in info.iteritems() if v[0][1] in self.radii
            }
            self.substructures_smiles.update(substructures_smiles)

            # generate the images for the substructures if required..
            if draw_substructures:
                if not _exists(image_directory):
                    _makedirs(image_directory)
                for k, v in info.iteritems():
                    if k not in self.substructure_dictionary.keys(
                    ) and v[0][1] in self.radii:
                        image_name = "%s/Molecule_%d_substr_%d.pdf" % (
                            image_directory, mol_index, k)
                        env = _FindAtomEnvironmentOfRadiusN(
                            mol, v[0][1], v[0][0])
                        amap = {}
                        submol = _PathToSubmol(mol, env, atomMap=amap)
                        _MolToFile(mol,
                                   image_name,
                                   size=(300, 300),
                                   wedgeBonds=True,
                                   kekulize=True,
                                   highlightAtoms=amap.keys())

            self.substructure_dictionary = self._combine_dicts(
                substructure_dictionary, self.substructure_dictionary)

        idxs = _array([val for sublist in idxs for val in sublist])
        counts = _array([val for sublist in counts for val in sublist])
        substr_ids_flattened = [
            val for sublist in substr_ids for val in sublist
        ]
        substr_ids = _array(substr_ids_flattened)
        self.substructure_ids = substr_ids
        if len(self.reference_substructure_keys) == 0:
            print(
                "No input set of keys for the substructures. \nThus, the substructures present in the input molecules will be considered for the calculation of unhashed fingerprints."
            )
            columns = _array(list(set(self.substructure_dictionary.keys())))
            columns = _sort(columns)
            self.columns_unhashed = columns
            dimensionality_unhashed = len(columns)
        else:
            columns = _array(list(set(self.reference_substructure_keys)))
            columns = _sort(columns)
            self.columns_unhashed = columns
            dimensionality_unhashed = len(columns)

        fps_unhashed_binary = _zeros((len(self.mols), dimensionality_unhashed),
                                     dtype=int)
        fps_unhashed_counts = _zeros((len(self.mols), dimensionality_unhashed),
                                     dtype=int)

        # removing the indices corresponding to the substructures in the test molecules not present in the references set of substructures..
        idxs = _array([
            idxs[x] for x in _arange(0, len(substr_ids))
            if substr_ids[x] in self.columns_unhashed
        ])
        counts = _array([
            counts[x] for x in _arange(0, len(substr_ids))
            if substr_ids[x] in self.columns_unhashed
        ])
        substr_ids = _array([
            substr_ids[x] for x in _arange(0, len(substr_ids))
            if substr_ids[x] in self.columns_unhashed
        ])
        mapping = _array([(substr_ids[x] == columns).nonzero()
                          for x in _arange(0, len(substr_ids))])
        mapping = mapping.flatten()
        if len(mapping) == 0:
            print(
                "There is no intersection between the substructures \n(i)provided in the reference key set, and\n(ii) the substructures found in the input molecules."
            )
            return

        fps_unhashed_binary[idxs, mapping] = _ones(len(counts))
        fps_unhashed_counts[idxs, mapping] = counts
        self.fps_unhashed_binary = fps_unhashed_binary
        self.fps_unhashed_counts = fps_unhashed_counts
Exemple #14
0
    def calculate_unhashed_fps(self,draw_substructures=False,image_directory='./images_substructures'): 
        # get the dictionary for the substructures
        idxs = []
        substr_ids = []
        counts=[]
        substructure_dictionaries = []    
        for mol_index,mol in enumerate(self.mols):
            info={}
            fp = _GetMorganFingerprint(mol,radius=self.max_radius,bitInfo=info)
            substructure_dictionary = {k:mol_index for k,v in info.iteritems() if v[0][1] in self.radii}
            substructure_dictionaries.append({k:mol_index for k,v in info.iteritems() if v[0][1] in self.radii})
            substr_ids.append(substructure_dictionary.keys())
            idxs.append([mol_index]*len(substructure_dictionary.keys()))
            counts.append([ len(info.values()[x]) for x in _arange(0,len(info)) if info.values()[x][0][1] in self.radii])
            
            # get the smiles for the substructures
            amap = {}
            substructures_smiles = {k:[_MolToSmiles(_PathToSubmol(mol,_FindAtomEnvironmentOfRadiusN(mol,v[0][1],v[0][0]),atomMap=amap))] for k,v in info.iteritems() if v[0][1] in self.radii}
            self.substructures_smiles.update(substructures_smiles)
            
            # generate the images for the substructures if required..
            if draw_substructures:
                if not _exists(image_directory):
                    _makedirs(image_directory)
                for k,v in info.iteritems():
                    if k not in self.substructure_dictionary.keys() and v[0][1] in self.radii:
                        image_name="%s/Molecule_%d_substr_%d.pdf"%(image_directory,mol_index,k)
                        env=_FindAtomEnvironmentOfRadiusN(mol,v[0][1],v[0][0])
                        amap={}
                        submol=_PathToSubmol(mol,env,atomMap=amap)
                        _MolToFile(mol,image_name,size=(300,300),wedgeBonds=True,kekulize=True,highlightAtoms=amap.keys())
            
        #self.substructure_dictionary = self._combine_dicts(substructure_dictionary,self.substructure_dictionary)
        for d in substructure_dictionaries:
             for k, v in d.iteritems():
               l=self.substructure_dictionary.setdefault(k,[])
               if v not in l:
                 l.append(v)
            
        idxs = _array([val for sublist in idxs for val in sublist])
        counts = _array([val for sublist in counts for val in sublist])
        substr_ids_flattened = [val for sublist in substr_ids for val in sublist]
        substr_ids = _array(substr_ids_flattened)
        self.substructure_ids = substr_ids
        if len(self.reference_substructure_keys)==0:
            print "No input set of keys for the substructures. \nThus, the substructures present in the input molecules will be considered for the calculation of unhashed fingerprints."
            columns = _array(list(set(self.substructure_dictionary.keys())))
            columns = _sort(columns)
            self.columns_unhashed = columns
            dimensionality_unhashed = len(columns)
        else:
            columns = _array(self.reference_substructure_keys)
            columns = _sort(columns)
            self.columns_unhashed = columns
            dimensionality_unhashed = len(columns)
        
        fps_unhashed_binary = _zeros((len(self.mols),dimensionality_unhashed), dtype=int)
        fps_unhashed_counts = _zeros((len(self.mols),dimensionality_unhashed), dtype=int)

            
        mapping = _array([(substr_ids[x]==columns).nonzero() for x in _arange(0,len(substr_ids))])
        mapping = mapping.flatten()
        idxs = _array([idxs[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0])
        counts = _array([counts[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0])
        mapping = _array([mapping[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0])
        if len(mapping) == 0:
            print "There is no intersection between the substructures \n(i)provided in the reference key set, and\n(ii) the substructures found in the input molecules."
            return
        
        fps_unhashed_binary[idxs,mapping] = _ones(len(mapping))
        fps_unhashed_counts[idxs,mapping] = counts
        self.fps_unhashed_binary = fps_unhashed_binary
        self.fps_unhashed_counts = fps_unhashed_counts
Exemple #15
0
def zeros(*shape: int, dtype=np.float32) -> ndarray:
    return _zeros(shape, dtype)  # type: ignore
Exemple #16
0
def zeros(*shp, dtype="float64"):
    return _zeros(shp, dtype=dtype)
Exemple #17
0
def zeros(*shp, dtype='float64'):
    return _zeros(shp, dtype=dtype)
Exemple #18
0
    def __init__(self, width, height):

        self._elevations = _zeros(width * height)