def randproj(tx, ty, rx, ry): compressor = RandomProjection(tx[1].size) newtx = compressor.fit_transform(tx) compressor = RandomProjection(tx[1].size) newrx = compressor.fit_transform(rx) #em(newtx, ty, newrx, ry, add="wRPtr", times=10) #km(newtx, ty, newrx, ry, add="wRPtr", times=10) nn(newtx, ty, newrx, ry, add="wRP")
def caller(tx, ty, rx, ry): nums = [4,8,12,16] for n in nums: print("PCA") print(n) compressor = PCA(n_components = n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) nnTable(newtx, ty, newrx, ry, alg="PCA") for n in nums: print("ICA") print(n) compressor = ICA(n_components = n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) nnTable(newtx, ty, newrx, ry, alg="ICA") for n in nums: print("RandProj") print(n) compressor = RandomProjection(n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) nnTable(newtx, ty, newrx, ry, alg="PCA") for n in nums: print("kbest") print(n) compressor = best(k=n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) nnTable(newtx, ty, newrx, ry, alg="PCA")
def graphCallerNN(tx, ty, rx, ry): n = tx[1].size/2 compressor = PCA(n_components = n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) newtx = oneem(newtx, ty, newrx, ry) myNN(newtx, ty, newrx, ry, "EM-PCA") # nnTable(newtx, ty, newrx, ry, alg="EM-PCA") compressor = ICA(n_components = n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) newtx = oneem(newtx, ty, newrx, ry) nnTable(newtx, ty, newrx, ry, alg="EM-ICA") myNN(newtx, ty, newrx, ry, "EM-Ica") compressor = RandomProjection(n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) newtx = oneem(newtx, ty, newrx, ry) nnTable(newtx, ty, newrx, ry, alg="EM-RP") myNN(newtx, ty, newrx, ry, "EM-RP") compressor = best(k=n) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) newtx = oneem(newtx, ty, newrx, ry) nnTable(newtx, ty, newrx, ry, alg="EM-KB") myNN(newtx, ty, newrx, ry, "EM-KB")
def randproj(tx, ty, rx, ry): compressor = RandomProjection(tx[1].size) compressor.fit(tx, y=ty) newtx = compressor.transform(tx) newrx = compressor.transform(rx) em(newtx, ty, newrx, ry, add="wRPtr", times=10) km(newtx, ty, newrx, ry, add="wRPtr", times=10) nn(newtx, ty, newrx, ry, add="wRPtr")
def randproj(tx, ty, rx, ry, dataset): compressor = RandomProjection(tx[1].size/2) compressor = RandomProjection(tx[1].size/2) compressor.fit(tx, y=ty) pca = RandomProjection(2) pca.fit(tx) result=pd.DataFrame(pca.transform(tx), columns=['RP%i' % i for i in range(2)]) my_color = pd.Series(ty).astype('category').cat.codes fig = plt.figure() ax = fig.add_subplot(111, projection='2d') ax = fig.add_subplot(111) ax.scatter(result['RP0'], result['RP1'], c=my_color, cmap="Dark2_r", s=60) ax.set_xlabel("RP1") ax.set_ylabel("RP2") ax.set_title("RP on the "+ dataset + " data set") plt.show() Store results of PCA in a data frame result=pd.DataFrame(compressor.transform(tx), columns=['ICA%i' % i for i in range(3)]) my_color = pd.Series(ty).astype('category').cat.codes fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(result['ICA0'], result['ICA1'], result['ICA2'], c=my_color, cmap="Dark2_r", s=60) xAxisLine = ((min(result['ICA0']), max(result['ICA0'])), (0, 0), (0,0)) ax.plot(xAxisLine[0], xAxisLine[1], xAxisLine[2], 'r') yAxisLine = ((0, 0), (min(result['ICA1']), max(result['ICA1'])), (0,0)) ax.plot(yAxisLine[0], yAxisLine[1], yAxisLine[2], 'r') zAxisLine = ((0, 0), (0,0), (min(result['ICA2']), max(result['ICA2']))) ax.plot(zAxisLine[0], zAxisLine[1], zAxisLine[2], 'r') ax.set_xlabel("RP1") ax.set_ylabel("RP2") ax.set_zlabel("RP3") ax.set_title("RP on the Car data set") plt.show() reduced_data = RandomProjection(2).fit_transform(tx) em(tx, ty, rx, ry, reduced_data, add="", times=4, dataset=dataset, alg="RP") newtx = compressor.transform(tx) newrx = compressor.transform(rx) em(newtx, ty, newrx, ry, [], add="", times=4, dataset=dataset, alg="RP") em(newtx, ty, newrx, ry, RandomProjection(n_components=2).fit_transform(tx), add="wRPtr", times=9, dataset=dataset, alg="RandProj") # nn(newtx, ty, newrx, ry, add="wRPtr") myNN(newtx, ty, newrx, ry, "RandProj")
# Scale the data scaler = StandardScaler() scaler.fit(X) X_train_std = scaler.transform(X) X_test_std = scaler.transform(X) X_toTransform = X_train_std y_input = y ###### # Run initial Projection Analysis with 1:n components ###### reconstructionErrors = [] maxComponents = 46 minComponents = 1 for i in range(minComponents, maxComponents): projection = RandomProjection(n_components=i) projection.fit(X_toTransform) reconstructionErrors.append(reconstructionError(projection, X_toTransform)) # print diagnostics #print('Components \n', projection.components_) print('Number of Components ', projection.n_components_) print('Reconstruction Error ', reconstructionError(projection, X_toTransform)) print(reconstructionErrors) print('Min reconstruction error: ' % np.max(reconstructionErrors), 'with ', np.max(reconstructionErrors == np.max(reconstructionErrors)) + 1, 'components') # Save reconstruction error plot with plt.style.context('seaborn-whitegrid'):
def kmtable(tx, ty, rx, ry, dataset=""): processed = [] adj_rand = [] v_meas = [] mutual_info = [] adj_mutual_info = [] sil = [] inertia = [] compressor = PCA(n_components = tx[1].size/2) compressor.fit(tx, y=ty) pcatx = compressor.transform(tx) pcarx = compressor.transform(rx) p = [] compressor = ICA(n_components = tx[1].size/2) compressor.fit(tx, y=ty) icatx = compressor.transform(tx) icarx = compressor.transform(rx) ic = [] compressor = RandomProjection(tx[1].size/2) compressor.fit(tx, y=ty) rptx = compressor.transform(tx) rprx = compressor.transform(rx) r = [] compressor = best(k=tx[1].size/2) compressor.fit(tx, y=ty) kbtx = compressor.transform(tx) kbrx = compressor.transform(rx) k = [] for i in range(2,8): # clusters = {x:[] for x in range(i)} clf = KM(n_clusters=i) clf.fit(pcatx) test = clf.predict(pcatx) result = clf.predict(pcarx) p.append(metrics.v_measure_score(ry.ravel(), result)) clf = KM(n_clusters=i) clf.fit(icatx) test = clf.predict(icatx) result = clf.predict(icarx) ic.append(metrics.v_measure_score(ry.ravel(), result)) clf = KM(n_clusters=i) clf.fit(rptx) test = clf.predict(rptx) result = clf.predict(rprx) r.append(metrics.v_measure_score(ry.ravel(), result)) clf = KM(n_clusters=i) clf.fit(kbtx) test = clf.predict(kbtx) result = clf.predict(kbrx) k.append(metrics.v_measure_score(ry.ravel(), result)) # adj_rand.append(metrics.adjusted_rand_score(ry.ravel(), result)) # v_meas.append(metrics.v_measure_score(ry.ravel(), result)) # mutual_info.append(metrics.fowlkes_mallows_score(ry.ravel(), result)) # adj_mutual_info.append(metrics.homogeneity_score(ry.ravel(), result)) plt.figure() plt.title(dataset+": KM Clustering & DR") plt.xlabel('Number of clusters') plt.ylabel('V Measure Score value') plt.plot(range(2,8), p, label="PCA") plt.plot(range(2,8), ic, label="ICA") plt.plot(range(2,8), r, label = "RP") plt.plot(range(2,8), k, label="KB") plt.legend() plt.ylim(ymin=-0.05, ymax=0.5) plt.savefig("KM_DR_"+dataset+"_VM.png", dpi=300)