Beispiel #1
0
plt.figure()
colors = ['navy', 'turquoise', 'darkorange']
lw = 2

for color, i, target_name in zip(colors, [0, 1, 2], [0, 1, 2]):
    plt.scatter(X_r[y == i, 0],
                X_r[y == i, 1],
                color=color,
                alpha=.8,
                lw=lw,
                label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('cPCA of IRIS dataset (alpha=2.15)')
plt.show()

ccpca = CCPCA()
ccpca.fit(X[y == 0], X[y != 0], var_thres_ratio=0.5, max_log_alpha=0.5)
X_r2 = ccpca.transform(X)

plt.figure()
for color, i, target_name in zip(colors, [0, 1, 2], [0, 1, 2]):
    plt.scatter(X_r2[y == i, 0],
                X_r2[y == i, 1],
                color=color,
                alpha=.8,
                lw=lw,
                label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('ccPCA of IRIS dataset (alpha =' + str(ccpca.get_best_alpha()) + ')')
plt.show()
def wordCloudGen():
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from ccpca import CCPCA
    from opt_sign_flip import OptSignFlip
    from mat_reorder import MatReorder
    # print("I was here")
    # classLabel = request.get.params("label")
    dataset_name = request.args.get("datasetName")
    print(dataset_name)
    data = None
    feature_names = None
    if ("_updated" in dataset_name):
        data = np.loadtxt(open("../data/" + str(dataset_name) + ".csv", "rb"),
                          delimiter=",",
                          skiprows=1)
        feature_names = np.genfromtxt("../data/" + dataset_name +
                                      ".featurenames.csv",
                                      delimiter=",",
                                      dtype='str',
                                      skip_header=1)
    else:
        data = np.loadtxt(open("./sample_data/" + str(dataset_name) + ".csv",
                               "rb"),
                          delimiter=",",
                          skiprows=1)
        feature_names = np.genfromtxt("./sample_data/" + dataset_name +
                                      ".featurenames.csv",
                                      delimiter=",",
                                      dtype='str',
                                      skip_header=1)
    print(feature_names)
    X = None
    if ("_updated" in dataset_name):
        X = data[:, 1:-3]
    else:
        X = data[:, :-3]
    y = np.int_(data[:, -3])
    unique_labels = np.unique(y)

    target_label = 0

    ccpca = CCPCA(n_components=2)
    ccpca.fit(X[y == target_label],
              X[y != target_label],
              var_thres_ratio=0.5,
              n_alphas=40,
              max_log_alpha=0.5)

    # get results
    cpca_result = ccpca.transform(X)
    best_alpha = ccpca.get_best_alpha()
    cpca_fcs = ccpca.get_feat_contribs()

    X = data[:, :-3]
    y = np.int_(data[:, -3])
    unique_labels = np.unique(y)

    _, n_feats = X.shape
    n_labels = len(unique_labels)
    first_cpc_mat = np.zeros((n_feats, n_labels))
    feat_contrib_mat = np.zeros((n_feats, n_labels))

    ccpca = CCPCA(n_components=1)
    for i, target_label in enumerate(unique_labels):
        ccpca.fit(X[y == target_label],
                  X[y != target_label],
                  var_thres_ratio=0.5,
                  n_alphas=40,
                  max_log_alpha=0.5)

        first_cpc_mat[:, i] = ccpca.get_first_component()
        feat_contrib_mat[:, i] = ccpca.get_scaled_feat_contribs()

    OptSignFlip().opt_sign_flip(first_cpc_mat, feat_contrib_mat)

    mr = MatReorder()
    mr.fit_transform(feat_contrib_mat)

    print(feature_names)
    combined = np.vstack((feature_names, cpca_fcs)).T
    print(combined)
    pd.DataFrame(combined).to_csv("../data/featContrib.csv")
    resp = make_response()
    resp.headers['Access-Control-Allow-Origin'] = '*'
    return resp