def read_gufm_all(filename=data_file): ''' Parameters ---------- filename Returns ------- ''' with open(filename,'rb') as f: f.readline() line1 = f.readline().split() l_max = int(line1[0]) nspl = int(line1[1]) n = l_max*(l_max+2) gt = _zeros(n*nspl) tknts = _zeros(nspl+4) tknts[:3] = [float(x) for x in line1[2:]] ti = 3 gi = 0 for line in f: if ti+4 <= len(tknts): tknts[ti:ti+4] = [float(x) for x in line.split()] ti += 4 else: gt[gi:gi+4] = [float(x) for x in line.split()] gi += 4 gt_out = gt.reshape(n, nspl, order='F') return gt_out, tknts, l_max, nspl
def read_gufm_all(filename=data_file): ''' Parameters ---------- filename Returns ------- ''' with open(filename, 'rb') as f: f.readline() line1 = f.readline().split() l_max = int(line1[0]) nspl = int(line1[1]) n = l_max * (l_max + 2) gt = _zeros(n * nspl) tknts = _zeros(nspl + 4) tknts[:3] = [float(x) for x in line1[2:]] ti = 3 gi = 0 for line in f: if ti + 4 <= len(tknts): tknts[ti:ti + 4] = [float(x) for x in line.split()] ti += 4 else: gt[gi:gi + 4] = [float(x) for x in line.split()] gi += 4 gt_out = gt.reshape(n, nspl, order='F') return gt_out, tknts, l_max, nspl
def calculate_hashed_fps_counts(self,nBits): # count format fps_hashed_binary = _zeros((len(self.mols),nBits), dtype=int) fps_hashed_counts = _zeros((len(self.mols),nBits), dtype=int) for mol_index,mol in enumerate(self.mols): info={} fp = _GetMorganFingerprint(mol,radius=self.max_radius,bitInfo=info) for key,val in info.iteritems(): if val[0][1] in self.radii: #check if the radius is in the selection fps_hashed_binary[mol_index,key%nBits] = 1 fps_hashed_counts[mol_index,key%nBits] += len(val) self.fps_hashed_binary = fps_hashed_binary self.fps_hashed_counts = fps_hashed_counts
def calculate_gt_raw(gt, spl, nleft, l_max=14, jorder=4): ''' Calculates the Gauss Coefficients in raw ordering given the parameters calculated by inverval() and bspline(). Parameters ---------- gt: raw data from gufm1 (n x nspl numpy array) spl: B-spline basis (jorder numpy array) nleft: coordinate of the timeknot to the left of desired time l_max: spherical harmonic degree included in model (14) jorder: order of B-splines (4) Returns ------- Gauss Coefficients for time in raw ordering. ''' n = l_max * (l_max + 2) g_raw = _zeros(n) for k in range(n): for j in range(jorder): g_raw[k] += spl[j] * gt[k, j + nleft - 4] return g_raw
def calculate_hashed_fps(self, nBits): # count format fps_hashed_binary = _zeros((len(self.mols), nBits), dtype=int) fps_hashed_counts = _zeros((len(self.mols), nBits), dtype=int) for mol_index, mol in enumerate(self.mols): info = {} fp = _GetMorganFingerprint(mol, radius=self.max_radius, bitInfo=info) for key, val in info.iteritems(): if val[0][ 1] in self.radii: #check if the radius is in the selection fps_hashed_binary[mol_index, key % nBits] = 1 fps_hashed_counts[mol_index, key % nBits] += len(val) self.fps_hashed_binary = fps_hashed_binary self.fps_hashed_counts = fps_hashed_counts
def calculate_gt_raw(gt, spl, nleft, l_max=14, jorder=4): ''' Calculates the Gauss Coefficients in raw ordering given the parameters calculated by inverval() and bspline(). Parameters ---------- gt: raw data from gufm1 (n x nspl numpy array) spl: B-spline basis (jorder numpy array) nleft: coordinate of the timeknot to the left of desired time l_max: spherical harmonic degree included in model (14) jorder: order of B-splines (4) Returns ------- Gauss Coefficients for time in raw ordering. ''' n = l_max*(l_max+2) g_raw = _zeros(n) for k in range(n): for j in range(jorder): g_raw[k] += spl[j]*gt[k,j+nleft-4] return g_raw
def generate(self, xoffset, zoffset, xsize, zsize, xscale, zscale): size = xsize * zsize elevs = _zeros(xsize * zsize) positions = empty(size * 6, dtype=Vector3f) normals = _zeros(size * 6, dtype=Vector3f._UNIT) colors = empty(size, dtype=Vector3f) index = 0 for iz in xrange(zsize): for ix in xrange(xsize): # Determine the positions of the quad p1 = Vector3f(xoffset + ix, 0, zoffset + iz) p2 = Vector3f(xoffset + ix, 0, zoffset + iz + 1) p3 = Vector3f(xoffset + ix + 1, 0, zoffset + iz + 1) p4 = Vector3f(xoffset + ix + 1, 0, zoffset + iz) positions[index:index + 6] = [p1, p2, p4, p4, p2, p3]
def _read_data(self, data_file=None): """ read data from datafile :param data_file: :return: """ if data_file is None: data_file = self.data_file with open(data_file,'rb') as f: f.readline() line1 = f.readline().split() l_max = int(line1[0]) nspl = int(line1[1]) if float(line1[2]) < 1000: bspl_order = int(line1[2]) l1_tknt_loc = 3 else: bspl_order = 4 l1_tknt_loc = 2 n = l_max*(l_max+2) gt = _zeros(n*nspl) tknts = _zeros(nspl+bspl_order) tknt_l1 = [float(x) for x in line1[l1_tknt_loc:]] tknts[:len(tknt_l1)] = tknt_l1 ti = len(tknt_l1) gi = 0 for line in f: l_tmp = [float(x) for x in line.split()] nl = len(l_tmp) if ti+nl <= len(tknts): tknts[ti:ti+nl] = l_tmp ti += nl else: gt[gi:gi+nl] = l_tmp gi += nl gt_out = gt.reshape(n, nspl, order='F') return gt_out, tknts, l_max, bspl_order
def bspline(time, tknts, jorder=4): ''' Calculates B-spline and time knot index location for time t. Parameters ---------- time: time to calculate tknts: array of time-knots jorder: order of b-splines Returns ------- nleft: index of the time knot on the left of the interval (tknts[nleft] <= time <= tknts[nleft+1]) spl: array of dimension jorder (default 4) containing the spline factors at time t. ''' nleft = interval(tknts, time) deltal = _zeros(jorder - 1) deltar = _zeros(jorder - 1) spline = _zeros(jorder) spline[0] = 1.0 for j in range(jorder - 1): deltar[j] = tknts[nleft + j + 1] - time deltal[j] = time - tknts[nleft - j] saved = 0.0 for i in range(j + 1): term = spline[i] / (deltar[i] + deltal[j - i]) spline[i] = saved + deltar[i] * term saved = deltal[j - i] * term spline[j + 1] = saved return nleft, spline
def bspline(time, tknts, jorder=4): ''' Calculates B-spline and time knot index location for time t. Parameters ---------- time: time to calculate tknts: array of time-knots jorder: order of b-splines Returns ------- nleft: index of the time knot on the left of the interval (tknts[nleft] <= time <= tknts[nleft+1]) spl: array of dimension jorder (default 4) containing the spline factors at time t. ''' nleft = interval(tknts, time) deltal = _zeros(jorder-1) deltar = _zeros(jorder-1) spline = _zeros(jorder) spline[0] = 1.0 for j in range(jorder-1): deltar[j] = tknts[nleft+j+1] - time deltal[j] = time - tknts[nleft-j] saved = 0.0 for i in range(j+1): term = spline[i]/(deltar[i]+deltal[j-i]) spline[i] = saved + deltar[i]*term saved = deltal[j-i]*term spline[j+1] = saved return nleft, spline
def __init__(self, max_size, stride=8): """ Constructor. Parameters ----------- max_size : (:obj:`int`) The maximum number of vertices housed within this manager's batch. stride : (:obj:`int`) The element size of one vertex. """ self._max_size = max_size self._stride = stride self._empty = [] self._last = None self._index_last = 0 # GPU vertex array, buffer object references self._data = _zeros(max_size * stride, dtype=FLOAT32) self._vao, self._vbo = create_batch_buffer(self._data.nbytes, [3, 2, 3], GL_STATIC_DRAW)
def defrag_quick(self): """ Refactor the first empty chunk in the batch. """ if len(self._empty) == 0: return False # Pop the first gap and link the prev and next chunks together removed = self._empty.pop(0) removed.get_next().set_previous(removed.get_previous()) rlength = removed.get_length() accum = [] length = 0 current = removed.get_next() while current is not None and current.is_gap( ) and length < MemoryManager.MAX_REFACTOR: current.shift(-rlength) accum.append(current.get_data()) length += current.get_length() / MemoryChunk.MAX_FLOAT_COUNT current = current.get_next() if current is None: # TODO: Need to flatten the accumulated array self.__store_to_gpu(removed.get_index_first(), accum) self._index_last -= rlength else: accum.append(_zeros(rlength)) self.__store_to_gpu(removed.get_index_first(), accum) if current.is_gap(): # Next chunk is already a gap, so just make that gap bigger current.shrink(-rlength) else: # Add empty bubble at the end of the accumulated refactor data bubble = MemoryChunk.create_empty_chunk( current.get_previous().get_index_last(), rlength) self._empty.insert(0, bubble) bubble.set_previous(current.get_previous()) bubble.set_next(current) return True
def calculate_unhashed_fps(self, draw_substructures=False, image_directory='./images_substructures'): # get the dictionary for the substructures idxs = [] substr_ids = [] counts = [] for mol_index, mol in enumerate(self.mols): info = {} fp = _GetMorganFingerprint(mol, radius=self.max_radius, bitInfo=info) substructure_dictionary = { k: [mol_index] for k, v in info.iteritems() if v[0][1] in self.radii } substr_ids.append(substructure_dictionary.keys()) idxs.append([mol_index] * len(substructure_dictionary.keys())) counts.append([ len(info.values()[x]) for x in _arange(0, len(info)) if info.values()[x][0][1] in self.radii ]) # get the smiles for the substructures amap = {} substructures_smiles = { k: [ _MolToSmiles( _PathToSubmol(mol, _FindAtomEnvironmentOfRadiusN( mol, v[0][1], v[0][0]), atomMap=amap)) ] for k, v in info.iteritems() if v[0][1] in self.radii } self.substructures_smiles.update(substructures_smiles) # generate the images for the substructures if required.. if draw_substructures: if not _exists(image_directory): _makedirs(image_directory) for k, v in info.iteritems(): if k not in self.substructure_dictionary.keys( ) and v[0][1] in self.radii: image_name = "%s/Molecule_%d_substr_%d.pdf" % ( image_directory, mol_index, k) env = _FindAtomEnvironmentOfRadiusN( mol, v[0][1], v[0][0]) amap = {} submol = _PathToSubmol(mol, env, atomMap=amap) _MolToFile(mol, image_name, size=(300, 300), wedgeBonds=True, kekulize=True, highlightAtoms=amap.keys()) self.substructure_dictionary = self._combine_dicts( substructure_dictionary, self.substructure_dictionary) idxs = _array([val for sublist in idxs for val in sublist]) counts = _array([val for sublist in counts for val in sublist]) substr_ids_flattened = [ val for sublist in substr_ids for val in sublist ] substr_ids = _array(substr_ids_flattened) self.substructure_ids = substr_ids if len(self.reference_substructure_keys) == 0: print( "No input set of keys for the substructures. \nThus, the substructures present in the input molecules will be considered for the calculation of unhashed fingerprints." ) columns = _array(list(set(self.substructure_dictionary.keys()))) columns = _sort(columns) self.columns_unhashed = columns dimensionality_unhashed = len(columns) else: columns = _array(list(set(self.reference_substructure_keys))) columns = _sort(columns) self.columns_unhashed = columns dimensionality_unhashed = len(columns) fps_unhashed_binary = _zeros((len(self.mols), dimensionality_unhashed), dtype=int) fps_unhashed_counts = _zeros((len(self.mols), dimensionality_unhashed), dtype=int) # removing the indices corresponding to the substructures in the test molecules not present in the references set of substructures.. idxs = _array([ idxs[x] for x in _arange(0, len(substr_ids)) if substr_ids[x] in self.columns_unhashed ]) counts = _array([ counts[x] for x in _arange(0, len(substr_ids)) if substr_ids[x] in self.columns_unhashed ]) substr_ids = _array([ substr_ids[x] for x in _arange(0, len(substr_ids)) if substr_ids[x] in self.columns_unhashed ]) mapping = _array([(substr_ids[x] == columns).nonzero() for x in _arange(0, len(substr_ids))]) mapping = mapping.flatten() if len(mapping) == 0: print( "There is no intersection between the substructures \n(i)provided in the reference key set, and\n(ii) the substructures found in the input molecules." ) return fps_unhashed_binary[idxs, mapping] = _ones(len(counts)) fps_unhashed_counts[idxs, mapping] = counts self.fps_unhashed_binary = fps_unhashed_binary self.fps_unhashed_counts = fps_unhashed_counts
def calculate_unhashed_fps(self,draw_substructures=False,image_directory='./images_substructures'): # get the dictionary for the substructures idxs = [] substr_ids = [] counts=[] substructure_dictionaries = [] for mol_index,mol in enumerate(self.mols): info={} fp = _GetMorganFingerprint(mol,radius=self.max_radius,bitInfo=info) substructure_dictionary = {k:mol_index for k,v in info.iteritems() if v[0][1] in self.radii} substructure_dictionaries.append({k:mol_index for k,v in info.iteritems() if v[0][1] in self.radii}) substr_ids.append(substructure_dictionary.keys()) idxs.append([mol_index]*len(substructure_dictionary.keys())) counts.append([ len(info.values()[x]) for x in _arange(0,len(info)) if info.values()[x][0][1] in self.radii]) # get the smiles for the substructures amap = {} substructures_smiles = {k:[_MolToSmiles(_PathToSubmol(mol,_FindAtomEnvironmentOfRadiusN(mol,v[0][1],v[0][0]),atomMap=amap))] for k,v in info.iteritems() if v[0][1] in self.radii} self.substructures_smiles.update(substructures_smiles) # generate the images for the substructures if required.. if draw_substructures: if not _exists(image_directory): _makedirs(image_directory) for k,v in info.iteritems(): if k not in self.substructure_dictionary.keys() and v[0][1] in self.radii: image_name="%s/Molecule_%d_substr_%d.pdf"%(image_directory,mol_index,k) env=_FindAtomEnvironmentOfRadiusN(mol,v[0][1],v[0][0]) amap={} submol=_PathToSubmol(mol,env,atomMap=amap) _MolToFile(mol,image_name,size=(300,300),wedgeBonds=True,kekulize=True,highlightAtoms=amap.keys()) #self.substructure_dictionary = self._combine_dicts(substructure_dictionary,self.substructure_dictionary) for d in substructure_dictionaries: for k, v in d.iteritems(): l=self.substructure_dictionary.setdefault(k,[]) if v not in l: l.append(v) idxs = _array([val for sublist in idxs for val in sublist]) counts = _array([val for sublist in counts for val in sublist]) substr_ids_flattened = [val for sublist in substr_ids for val in sublist] substr_ids = _array(substr_ids_flattened) self.substructure_ids = substr_ids if len(self.reference_substructure_keys)==0: print "No input set of keys for the substructures. \nThus, the substructures present in the input molecules will be considered for the calculation of unhashed fingerprints." columns = _array(list(set(self.substructure_dictionary.keys()))) columns = _sort(columns) self.columns_unhashed = columns dimensionality_unhashed = len(columns) else: columns = _array(self.reference_substructure_keys) columns = _sort(columns) self.columns_unhashed = columns dimensionality_unhashed = len(columns) fps_unhashed_binary = _zeros((len(self.mols),dimensionality_unhashed), dtype=int) fps_unhashed_counts = _zeros((len(self.mols),dimensionality_unhashed), dtype=int) mapping = _array([(substr_ids[x]==columns).nonzero() for x in _arange(0,len(substr_ids))]) mapping = mapping.flatten() idxs = _array([idxs[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0]) counts = _array([counts[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0]) mapping = _array([mapping[x] for x in _arange(0,len(mapping)) if mapping[x].size != 0]) if len(mapping) == 0: print "There is no intersection between the substructures \n(i)provided in the reference key set, and\n(ii) the substructures found in the input molecules." return fps_unhashed_binary[idxs,mapping] = _ones(len(mapping)) fps_unhashed_counts[idxs,mapping] = counts self.fps_unhashed_binary = fps_unhashed_binary self.fps_unhashed_counts = fps_unhashed_counts
def zeros(*shape: int, dtype=np.float32) -> ndarray: return _zeros(shape, dtype) # type: ignore
def zeros(*shp, dtype="float64"): return _zeros(shp, dtype=dtype)
def zeros(*shp, dtype='float64'): return _zeros(shp, dtype=dtype)
def __init__(self, width, height): self._elevations = _zeros(width * height)