def calculate_clusters_eden(collection): compounds = Compound.objects(compound_group=collection) # call EdenUtil Cluster clusters_file = EdenUtil.cluster( EdenUtil.sdf_to_gspan( CompoundCollection.generateTmpSDF(compounds, 'tmpcollection').name)) clusters = [] # create a cluster for each line for eden_cluster in clusters_file: eden_nodes = eden_cluster.split(" ") eden_nodes = [x for x in eden_nodes if (x != '' and x != '\n')] cluster = Cluster( title="Cluster_" + str(eden_nodes[0]), collection=collection, nodes=[compounds[int(node)] for node in eden_nodes], centriod=compounds[int(eden_nodes[0])], density=0.0, Color="#EF0000") clusters.append(cluster) cluster.save() clusters_file.close()
def calc_new_mol(request) : if request.is_ajax(): selected_compounds = request.GET["selected_compounds"] compounds = Compound.objects({"name__in":selected_compounds}) new_comp = EdenUtil.molcule_design(EdenUtil.sdf_to_gspan(CompoundCollection.generateTmpSDF(compounds,"tmpsdf").name)) response_data = {} response_data["new_comp" ] = new_comp response_data['result'] = 'Success' response_data['message'] = 'New Molcule Designed' return HttpResponse(json.dumps(response_data), mimetype="application/json")
def distance_matrix_cluster_centriods(self) : #1- generat an sdf file of the clusters centriod tmpsdffile = CompoundCollection.generateTmpSDF(Cluster.get_clusters_centriod(self.compound_group),'centriods') pdb.set_trace() #2- calcualte distance matrix for the centriods sm_output = EdenUtil.similarty_matrix(EdenUtil.sdf_to_gspan(tmpsdffile.name)) distances = 1 - sm_output ####################################### self.data.new_file() self.data.write(distances.tostring()) self.data.close() self.save(validate=False,cascade=False) self.calc_knn_eden()
def distance_matrix_cluster_centriods(self): #1- generat an sdf file of the clusters centriod tmpsdffile = CompoundCollection.generateTmpSDF( Cluster.get_clusters_centriod(self.compound_group), 'centriods') pdb.set_trace() #2- calcualte distance matrix for the centriods sm_output = EdenUtil.similarty_matrix( EdenUtil.sdf_to_gspan(tmpsdffile.name)) distances = 1 - sm_output ####################################### self.data.new_file() self.data.write(distances.tostring()) self.data.close() self.save(validate=False, cascade=False) self.calc_knn_eden()
def PerformPCA(self, collection=None): # pdb.set_trace() use_sparse = True feature_vectors = [] feature_dim = 15 if collection is None: collection = [node for c in self.cluster for node in c.nodes] #Genrate gspan for Cluster nodes tmpsdffile = CompoundCollection.generateTmpSDF(collection, 'cluster') #Calculate Feature for cluster nodes feature_file = EdenUtil.feature(EdenUtil.sdf_to_gspan(tmpsdffile.name), feature_dim) i = 0 if use_sparse: sparse_mtx = lil_matrix((len(collection), 2**feature_dim)) row_indx = 0 for row in feature_file: for fet in row.split(" ")[1:]: sparse_mtx[row_indx, int(fet.split(":")[0])] = float( fet.split(":")[1]) #compound_feature = Feature(compound=collection[i] , features = sparse_mtx.rows[row_indx]) #compound_feature.save() row_indx += 1 pca = sklearn.decomposition.RandomizedPCA(n_components=3) pca_res = pca.fit_transform(sparse_mtx) else: for row in feature_file: feat_vect = [0] * (2**feature_dim) for fet in row.split(" ")[1:]: feat_vect[int(fet.split(":")[0])] = float( fet.split(":")[1]) compound_feature = Feature(compound=collection[i], features=feat_vect) compound_feature.save() i += 1 feature_vectors.append(feat_vect) #Perform PCA on Feature Vectors pca = sklearn.decomposition.PCA(n_components=3) pca_res = pca.fit_transform(feature_vectors) #store PCA Cluster Result # return pca_res
def calc_knn_eden(self): # Calculate and Store KNN print "\n Calculating KNN ... \n" compounds = Compound.objects(compound_group=self.compound_group) tmpsdffile = CompoundCollection.generateTmpSDF(compounds, "comps_eden") knn = EdenUtil.nearest_neighbor(EdenUtil.sdf_to_gspan(tmpsdffile.name)) for knn_entry in knn: knn_item = KnnItem( simmatrix=self, mol_id=str(compounds[int(knn_entry['indxs'].pop(0))].name), neighbors=[{ "mol_id": str(compounds[int(x)].name), "val": float(knn_entry['knn_vals'][knn_entry['indxs'].index(x)]) } for x in knn_entry['indxs']]) knn_item.save()
def calc_knn_eden(self) : # Calculate and Store KNN print "\n Calculating KNN ... \n" compounds = Compound.objects(compound_group=self.compound_group) tmpsdffile = CompoundCollection.generateTmpSDF(compounds,"comps_eden") knn = EdenUtil.nearest_neighbor(EdenUtil.sdf_to_gspan(tmpsdffile.name)) for knn_entry in knn : knn_item = KnnItem( simmatrix = self ,mol_id = str(compounds[int(knn_entry['indxs'].pop(0))].name) ,neighbors = [ { "mol_id":str(compounds[int(x)].name) ,"val":float(knn_entry['knn_vals'][knn_entry['indxs'].index(x)]) } for x in knn_entry['indxs'] ] ) knn_item.save()
def distance_matrix_eden(self) : comps = Compound.objects(compound_group=self.compound_group) n = comps.count() distances = numpy.ones((n, n), numpy.float64) # mmfile = self.compound_group.mol_file.read() # tmpsdffile = open('/tmp/'+self.compound_group.title+'.sdf', 'wb+') # tmpsdffile.write(mmfile) # tmpsdffile.close() tmpsdffile = CompoundCollection.generateTmpSDF(comps,"comps_eden") # Calcuate Distances using EDeN sm_output = EdenUtil.similarty_matrix(EdenUtil.sdf_to_gspan(tmpsdffile.name)) distances = 1 - sm_output ####################################### self.data.new_file() self.data.write(distances.tostring()) self.data.close() self.save(validate=False,cascade=False) self.calc_knn_eden()
def distance_matrix_eden(self): comps = Compound.objects(compound_group=self.compound_group) n = comps.count() distances = numpy.ones((n, n), numpy.float64) # mmfile = self.compound_group.mol_file.read() # tmpsdffile = open('/tmp/'+self.compound_group.title+'.sdf', 'wb+') # tmpsdffile.write(mmfile) # tmpsdffile.close() tmpsdffile = CompoundCollection.generateTmpSDF(comps, "comps_eden") # Calcuate Distances using EDeN sm_output = EdenUtil.similarty_matrix( EdenUtil.sdf_to_gspan(tmpsdffile.name)) distances = 1 - sm_output ####################################### self.data.new_file() self.data.write(distances.tostring()) self.data.close() self.save(validate=False, cascade=False) self.calc_knn_eden()
def calculate_clusters_eden(collection) : compounds = Compound.objects(compound_group = collection) # call EdenUtil Cluster clusters_file = EdenUtil.cluster(EdenUtil.sdf_to_gspan(CompoundCollection.generateTmpSDF(compounds,'tmpcollection').name)) clusters = [] # create a cluster for each line for eden_cluster in clusters_file : eden_nodes = eden_cluster.split(" ") eden_nodes = [x for x in eden_nodes if (x != '' and x != '\n')] cluster = Cluster( title = "Cluster_" + str(eden_nodes[0]) ,collection = collection ,nodes = [compounds[int(node)] for node in eden_nodes] ,centriod = compounds[int(eden_nodes[0])] ,density = 0.0 ,Color = "#EF0000" ) clusters.append(cluster) cluster.save() clusters_file.close()
def simialrity_matrix(self) : data = EdenUtil.simialrity_matrix(self.nodes)
def distance_matrix_cluster(self,cluster) : # clusters = Cluster.objects(collection = self.compound_group) for cluster in clusters : data = EdenUtil.simialrity_matrix(cluster.nodes)
def distance_matrix_cluster(self, cluster): # clusters = Cluster.objects(collection = self.compound_group) for cluster in clusters: data = EdenUtil.simialrity_matrix(cluster.nodes)
def calc_pos(request): if request.method == 'POST': # Calculate The Similarity Matrix #Calculate Finger Prints calc_method = str(request.POST["method"]) cg = CompoundCollection.objects().with_id(str(request.POST['collection_id'])) if calc_method == "FP" : print "\n Calculating Simmatrix (FingerPrints) ... \n " sm = SimilarityMatrix(compound_group = cg , method=calc_method) sm.distance_matrix(0.0) elif calc_method == "eden" : print "\n Calculating Simmatrix (EDeN) ...\n " sm = SimilarityMatrix(compound_group = cg,method=calc_method) sm.distance_matrix_eden() elif calc_method == "pre_cluster_eden" : Cluster.calculate_clusters_eden(cg) sm = SimilarityMatrix(compound_group = cg,method=calc_method) sm.distance_matrix_cluster_centriods() if calc_method == "pre_cluster_eden_pca" : # Calculate PCA if(len(Cluster.objects(collection = cg))<1): print "\n Calculating Clusters ... \n " Cluster.calculate_clusters_eden(cg) # Calculate and Store KNN compounds = Compound.objects(compound_group=cg) tmpsdffile = CompoundCollection.generateTmpSDF(compounds,"comps_eden") print "\n Calculating KNN ... \n" knn = EdenUtil.nearest_neighbor(EdenUtil.sdf_to_gspan(tmpsdffile.name)) for knn_entry in knn : knn_item = KnnItem( compound_group = cg ,mol_id = str(compounds[int(knn_entry['indxs'].pop(0))].name) ,neighbors = [ { "mol_id":str(compounds[int(x)].name) ,"val":float(knn_entry['knn_vals'][knn_entry['indxs'].index(x)]) } for x in knn_entry['indxs'] ] ) knn_item.save() print "\n Calculating PCA ... \n " pca = PCARes(compound_group=cg, title=str(request.POST['pos_title'])) res = pca.PerformPCA(collection=Cluster.get_clusters_centriod(compound_group=cg)).tostring() pca.data.new_file() pca.data.write(res) pca.data.close() pca.save() else : # Calculate MDS print "\n Calculating MDS ... \n " res = MDSRes(title = str(request.POST['pos_title']),simmatrix = sm,max_iter =300,eps = 1e-6) ; data = numpy.fromstring(sm.data.read()) d = math.sqrt(len(data)) datamd = numpy.reshape(data, (d,d)) res.data.new_file() res.data.write(res.runMDS(simmatrix = datamd).tostring()) res.data.close() res.save() response_data = {} response_data['result'] = 'Success' response_data['message'] = 'Calculate Position Results' return HttpResponseRedirect('/space_explorer/')
def simialrity_matrix(self): data = EdenUtil.simialrity_matrix(self.nodes)