def getHeatmap(dataset_name): #dataset_name = request.args.get("dataset_name") print(dataset_name) data = None feature_names = None if ("_updated" in dataset_name): data = np.loadtxt(open("../data/" + str(dataset_name) + ".csv", "rb"), delimiter=",", skiprows=1) feature_names = np.genfromtxt("../data/" + dataset_name + ".featurenames.csv", delimiter=",", dtype='str', skip_header=1) else: data = np.loadtxt(open("./sample_data/" + str(dataset_name) + ".csv", "rb"), delimiter=",", skiprows=1) feature_names = np.genfromtxt("./sample_data/" + dataset_name + ".featurenames.csv", delimiter=",", dtype='str', skip_header=1) X = None if ("_updated" in dataset_name): X = data[:, 1:-3] else: X = data[:, :-3] y = np.int_(data[:, -3]) unique_labels = np.unique(y) print(X.shape) _, n_feats = X.shape n_labels = len(unique_labels) first_cpc_mat = np.zeros((n_feats, n_labels)) feat_contrib_mat = np.zeros((n_feats, n_labels)) # 1. get the scaled feature contributions and first cPC for each label ccpca = CCPCA(n_components=1) for i, target_label in enumerate(unique_labels): ccpca.fit(X[y == target_label], X[y != target_label], var_thres_ratio=0.2, n_alphas=80, max_log_alpha=0.2) first_cpc_mat[:, i] = ccpca.get_first_component() feat_contrib_mat[:, i] = ccpca.get_scaled_feat_contribs() if (dataset_name == "mnist_updated" or dataset_name == "fashion_mnist_updated"): xlabel_names = [None] * n_labels for i, label in enumerate(unique_labels): if (unique_labels[label] == -1): c = "Z" else: c = chr(65 + unique_labels[label]) xlabel_names[i] = str(c) f = open( "/home/user/Desktop/heatmap/data/" + dataset_name + "_labels.csv", "w") f.write("label\n") for label in xlabel_names: f.write(str(label) + "\n") f.close() print(xlabel_names) ylabel_names = feature_names.tolist() f = open( "/home/user/Desktop/heatmap/data/" + dataset_name + "_features.csv", "w") f.write("feature\n") for feature in ylabel_names: f.write(str(feature) + "\n") f.close() f = open( "/home/user/Desktop/heatmap/data/" + dataset_name + "_heatmap.csv", "w") f.write("feature,label,contribution\n") for i, feature in enumerate(ylabel_names): for j, label in enumerate(xlabel_names): f.write( str(feature) + "," + str(label) + "," + str(feat_contrib_mat[i, j]) + "\n") else: # 2. apply optimal sign flipping OptSignFlip().opt_sign_flip(first_cpc_mat, feat_contrib_mat) # 3. apply hierarchical clustering with optimal-leaf-ordering mr = MatReorder() feat_contrib_mat = mr.fit_transform(feat_contrib_mat) # 4. apply aggregation n_feats_shown = n_feats agg_feat_contrib_mat, label_to_rows, label_to_rep_row = mr.aggregate_rows( feat_contrib_mat, n_feats_shown, agg_method='abs_max') # plot cluster names xlabel_names = [None] * n_labels for i, label in enumerate(mr.order_col_): if (unique_labels[label] == -1): c = "Z" else: c = chr(65 + unique_labels[label]) xlabel_names[i] = str(c) f = open("../data/" + dataset_name + "_labels.csv", "w") f.write("label\n") for label in xlabel_names: f.write(str(label) + "\n") f.close() # plot feature names ylabel_names = np.array(feature_names)[mr.order_row_] # ylabel_names = np.array(feature_names, dtype=object)[label_to_rep_row] # for i in range(len(ylabel_names)): # name = ylabel_names[i] # rows = label_to_rows[i] # if len(rows) > 1: # ylabel_names[i] = name + ', ' + str(len(rows) - 1) + ' more' ylabel_names = ylabel_names.tolist() f = open("../data/" + dataset_name + "_features.csv", "w") f.write("feature\n") for feature in ylabel_names: f.write(str(feature) + "\n") f.close() f = open("../data/" + dataset_name + "_heatmap.csv", "w") f.write("feature,label,contribution\n") for i, feature in enumerate(ylabel_names): for j, label in enumerate(xlabel_names): f.write( str(feature) + "," + str(label) + "," + str(feat_contrib_mat[i, j]) + "\n") f.close()
plt.figure() colors = ['navy', 'turquoise', 'darkorange'] lw = 2 for color, i, target_name in zip(colors, [0, 1, 2], [0, 1, 2]): plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8, lw=lw, label=target_name) plt.legend(loc='best', shadow=False, scatterpoints=1) plt.title('cPCA of IRIS dataset (alpha=2.15)') plt.show() ccpca = CCPCA() ccpca.fit(X[y == 0], X[y != 0], var_thres_ratio=0.5, max_log_alpha=0.5) X_r2 = ccpca.transform(X) plt.figure() for color, i, target_name in zip(colors, [0, 1, 2], [0, 1, 2]): plt.scatter(X_r2[y == i, 0], X_r2[y == i, 1], color=color, alpha=.8, lw=lw, label=target_name) plt.legend(loc='best', shadow=False, scatterpoints=1) plt.title('ccPCA of IRIS dataset (alpha =' + str(ccpca.get_best_alpha()) + ')') plt.show()
def wordCloudGen(): import pandas as pd import numpy as np import matplotlib.pyplot as plt from ccpca import CCPCA from opt_sign_flip import OptSignFlip from mat_reorder import MatReorder # print("I was here") # classLabel = request.get.params("label") dataset_name = request.args.get("datasetName") print(dataset_name) data = None feature_names = None if ("_updated" in dataset_name): data = np.loadtxt(open("../data/" + str(dataset_name) + ".csv", "rb"), delimiter=",", skiprows=1) feature_names = np.genfromtxt("../data/" + dataset_name + ".featurenames.csv", delimiter=",", dtype='str', skip_header=1) else: data = np.loadtxt(open("./sample_data/" + str(dataset_name) + ".csv", "rb"), delimiter=",", skiprows=1) feature_names = np.genfromtxt("./sample_data/" + dataset_name + ".featurenames.csv", delimiter=",", dtype='str', skip_header=1) print(feature_names) X = None if ("_updated" in dataset_name): X = data[:, 1:-3] else: X = data[:, :-3] y = np.int_(data[:, -3]) unique_labels = np.unique(y) target_label = 0 ccpca = CCPCA(n_components=2) ccpca.fit(X[y == target_label], X[y != target_label], var_thres_ratio=0.5, n_alphas=40, max_log_alpha=0.5) # get results cpca_result = ccpca.transform(X) best_alpha = ccpca.get_best_alpha() cpca_fcs = ccpca.get_feat_contribs() X = data[:, :-3] y = np.int_(data[:, -3]) unique_labels = np.unique(y) _, n_feats = X.shape n_labels = len(unique_labels) first_cpc_mat = np.zeros((n_feats, n_labels)) feat_contrib_mat = np.zeros((n_feats, n_labels)) ccpca = CCPCA(n_components=1) for i, target_label in enumerate(unique_labels): ccpca.fit(X[y == target_label], X[y != target_label], var_thres_ratio=0.5, n_alphas=40, max_log_alpha=0.5) first_cpc_mat[:, i] = ccpca.get_first_component() feat_contrib_mat[:, i] = ccpca.get_scaled_feat_contribs() OptSignFlip().opt_sign_flip(first_cpc_mat, feat_contrib_mat) mr = MatReorder() mr.fit_transform(feat_contrib_mat) print(feature_names) combined = np.vstack((feature_names, cpca_fcs)).T print(combined) pd.DataFrame(combined).to_csv("../data/featContrib.csv") resp = make_response() resp.headers['Access-Control-Allow-Origin'] = '*' return resp