def getPereiraDataAndLabels(): data = fileReading.readFile(r'data_RNA_Seq_expression_median_modified.txt') data = data.transpose() labels = fileReading.readFile(r'data_clinical_sample.txt') labels = labels[4:, :] patientIDInLabels = labels[1:, 0] patientIDInExpression = data[1:, 0] finalLabels = [] for patientID in patientIDInLabels: ind = np.argwhere(patientIDInExpression == patientID) if ind.size != 0: val = labels[ind[0], 5].tolist() if len(val): finalLabels.append(val[0]) data = data[1:, 1:] data = np.array(data, dtype=float) nLabels = np.zeros((data.shape[0], ), dtype=np.int) for idx in range(len(finalLabels)): if finalLabels[idx] == 'Positive': nLabels[idx] = 1 return data, nLabels
#labels = labels[4:,:] #patientIDInLabels =labels[1:,0] #patientIDInExpression = data[1:, 0] #finalLabels = [] #for patientID in patientIDInLabels: # ind = np.argwhere(patientIDInExpression == patientID) # finalLabels.append(labels[ind,5]) #data = fileReading.readFile('Skin.txt') #data = np.array(data) #data = data.astype(float) #C = onlineKmeans(data, 20) data = fileReading.readFile('breast-cancer-wisconsin.data') data = data[:, :-1] data = np.array(data) data = data.astype(float) #X_train, X_test, y_train, y_test = train_test_split(data, data, test_size=0.3,random_state=109) #clf = svm.SVC(kernel='linear') #clf.fit(X_train, y_train) list=cluster(data) #list = [] #for k in range (10, 50, 10): # C = onlineKmeans(data, k) # print(k) # list.append(len(C))
# -*- coding: utf-8 -*- """ Created on Thu Dec 6 10:35:43 2018 @author: Jubair """ import numpy as np import fileReading from sklearn.cluster import KMeans k = 5 data = fileReading.readFile('lung-cancer.data') labels = data[:, 0] data = data[:, 1:] data = np.array(data) data = data.astype(float) kmeans = KMeans(n_clusters=k, random_state=5).fit(data) cost_offline = kmeans.inertia_
# -*- coding: utf-8 -*- """ Created on Wed Nov 28 12:34:38 2018 @author: atifu """ "implementation of dcj-rna by Badr et al." import numpy as np from fileReading import readFile from allignComponents import allignComponents from result import result structure_1 = readFile('Cyano-1.txt') structure_2 = readFile('Flavo-1.txt') no_inter_first = len(structure_1) no_inter_second = len(structure_2) structure_1 = structure_1.transpose() structure_2 = structure_2.transpose() opening_bracket_fs = structure_1[0:1, ] closing_bracket_fs = structure_1[1:2, ] length_first = structure_1[2:3, ] opening_bracket_ss = structure_2[0:1, ] closing_bracket_ss = structure_2[1:2, ] length_second = structure_2[2:3, ] wp = 1
def dataprocess(): data = fileReading.readFile('breast-cancer-wisconsin.data') data = data[:, :-1] data = np.array(data) data = data.astype(float) return data