def update(request): df_label = pd.read_csv( os.path.join(settings.BASE_DIR, 'data/outcome_labels.csv')) print("df_label", '\n', df_label) df_data = pd.read_csv( os.path.join(settings.BASE_DIR, 'data/features_rep.csv')) #print("df_data", df_data) #get unique row ids rowIDLIst = pd.concat([df_label.id1, df_label.id2], axis=0).unique().tolist() #rowIDLIst2 = pd.concat([df_label.id1,df_label.id2],axis = 1).unique().tolist() print("rowIDLIst", '\n', rowIDLIst) #print("rowIDLIst2",'\n', rowIDLIst2) #connectivity graph cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)]) print("as_Matrix", '\n', df_label.as_matrix) for lbl in df_label.as_matrix(): print("lbl", lbl) print("lbl[0]", lbl[0]) print("lbl[1]", lbl[1]) print("lbl[2]", lbl[2]) print("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]), "rowIDLIst.index(lbl[1])", rowIDLIst.index(lbl[1])) cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2]) cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2]) print("cmatrix", '\n', cmatrix) trainedData = [] for rid in rowIDLIst: row = df_data.iloc[[rid]] trainedData.append(row) print("trainedData1", '\n', trainedData) trainedData = pd.concat(trainedData, axis=0).as_matrix() print("trainedData2" "\n", trainedData) metric = SDML().fit(trainedData, cmatrix) newData = metric.transform(df_data) al_selection = request.session['clustering'] num_clustering = request.session['num_cluster'] clusteringAndTSNE(newData, al_selection, num_clustering) # context is a dict of html code, containing three types of features representation content = {'Title': "Step 7: Clustering Visualization", "listId": "li7"} return render(request, 'clustering/stp7-clu-visualisation.html', content)
def update(): df_label = pd.read_csv('data/outcome_labels.csv') print "df_label", '\n', df_label df_data = pd.read_csv('data/features_rep.csv') #df_data = pd.read_csv('data/alvin_rep.csv') print "df_data", '\n', df_data #print("df_data", df_data) #get unique row ids rowIDLIst = pd.concat([df_label.id1, df_label.id2], axis=0).unique().tolist() #rowIDLIst2 = pd.concat([df_label.id1,df_label.id2],axis = 1) print "rowIDLIst", '\n', rowIDLIst #print("rowIDLIst2",'\n', rowIDLIst2) #connectivity graph cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)]) #print("as_Matrix", '\n', df_label.as_matrix) for lbl in df_label.as_matrix(): #print ("lbl",lbl) #print ("lbl[0]",lbl[0]) #print ("lbl[1]",lbl[1]) #print ("lbl[2]",lbl[2]) #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1])) cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2]) cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2]) print "cmatrixShape", '\n', cmatrix.shape trainedData = [] for rid in rowIDLIst: row = df_data.iloc[[rid]] #print "row","\n",row #print "rowType","\n",type(row) trainedData.append(row) #print "trainedData","\n", trainedData #print "typetrainedData1", '\n', len(trainedData) trainedData = pd.concat(trainedData, axis=0).as_matrix() #print "trainedData2", "\n", trainedData print "trainedData.shape", '\n', trainedData.shape metric = SDML().fit(trainedData, cmatrix) newData = metric.transform(df_data) clusteringAndTSNE(newData)
def test_iris(self): num_constraints = 1500 n = self.iris_points.shape[0] np.random.seed(1234) W = SDML.prepare_constraints(self.iris_labels, n, num_constraints) # Test sparse graph inputs. for graph in ((W, scipy.sparse.csr_matrix(W))): sdml = SDML().fit(self.iris_points, graph) csep = class_separation(sdml.transform(), self.iris_labels) self.assertLess(csep, 0.25)
def test_iris(self): num_constraints = 1500 n = self.iris_points.shape[0] # Note: this is a flaky test, which fails for certain seeds. # TODO: un-flake it! np.random.seed(5555) W = SDML.prepare_constraints(self.iris_labels, n, num_constraints) # Test sparse graph inputs. for graph in ((W, scipy.sparse.csr_matrix(W))): sdml = SDML().fit(self.iris_points, graph) csep = class_separation(sdml.transform(), self.iris_labels) self.assertLess(csep, 0.25)
def metricLearning(data): df_label = pd.read_csv('../TestAndLearn/data/outcome_labels.csv') #print("df_label", '\n', df_label) #get unique row ids rowIDLIst = pd.concat([df_label.id1, df_label.id2], axis=0).unique().tolist() print("rowIDLIst", '\n', rowIDLIst) #connectivity graph cmatrix = np.zeros([len(rowIDLIst), len(rowIDLIst)]) #print("as_Matrix", '\n', df_label.as_matrix) for lbl in df_label.as_matrix(): #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1])) cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2]) cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2]) print("cmatrix.shape", '\n', cmatrix.shape) trainedData = [] for rid in rowIDLIst: row = data.iloc[[rid]] #print "row","\n",row #print "rowType","\n",type(row) trainedData.append(row) #print "LentrainedData","\n", len(trainedData) #print "typetrainedData1", '\n', len(trainedData) trainedData = pd.concat(trainedData, axis=0).as_matrix() print("trainedData.shape", "\n", trainedData.shape) #print "trainedData2", "\n", trainedData metric = SDML().fit(trainedData, cmatrix) newData = metric.transform(data) return newData
#print ("lbl[0]",lbl[0]) #print ("lbl[1]",lbl[1]) #print ("lbl[2]",lbl[2]) #print ("rowIDLIst.index(lbl[0])", rowIDLIst.index(lbl[0]),"rowIDLIst.index(lbl[1])",rowIDLIst.index(lbl[1])) cmatrix[rowIDLIst.index(lbl[0])][rowIDLIst.index(lbl[1])] = int(lbl[2]) cmatrix[rowIDLIst.index(lbl[1])][rowIDLIst.index(lbl[0])] = int(lbl[2]) print "cmatrix.shape", '\n', cmatrix.shape trainedData = [] for rid in rowIDLIst: row = df_reperent.iloc[[rid]] #print "row","\n",row #print "rowType","\n",type(row) trainedData.append(row) #print "LentrainedData","\n", len(trainedData) #print "typetrainedData1", '\n', len(trainedData) trainedData = pd.concat(trainedData, axis=0).as_matrix() print "trainedData.shape", "\n", trainedData.shape #print "trainedData2", "\n", trainedData metric = SDML().fit(trainedData, cmatrix) newData = metric.transform(df_reperent) print type(newData) print newData.shape