Ejemplo n.º 1
0
def dim_reductor(n, X):
    reductor=dict(PCA=PCA(n).fit(X).transform(X),\
    KPCA=KernelPCA(n,'rbf').fit_transform(X),\
    ISOMAP=Isomap(10,n).fit(X).transform(X),\
    MDS=MDS(n).fit_transform(X),\
    LLE=LLE(X,10,n)[0])
    return (reductor)
Ejemplo n.º 2
0
def lle(space):

    n_neighbors = int(space['n_neighbors'])
    method = space['method']

    vertices, colors = get_all_vertices_dk_atlas_w_colors()
    print(space)

    lle = LLE(n_neighbors=n_neighbors, n_components=2, method=method, neighbors_algorithm='auto')
    lle_xy = lle.fit_transform(vertices)

    centers = get_centers_of_rois_xy(lle_xy)

    avg_distance = avg_distance_between_center_of_masses(centers)

    model_name = 'lle_{}_{}'.format(method, avg_distance)

    result = {
        'loss': -avg_distance,
        'space': space,
        'status': STATUS_OK
    }

    save_json_result(model_name, result)
    save_2d_roi_map(lle_xy, colors, centers, model_name)

    return result
Ejemplo n.º 3
0
def dim_reduct_plot(file):
    """
    获取 matteonormb.obj 的所有点,降噪后对其进行绘图
    :param file: obj
    :return:
    """
    mesh = meshio.read(file)
    points = mesh.points

    pca_data = prim_com_analy(points, 2)
    # pca = PCA(n_components=2)
    # X_pca = pca.fit_transform(points)
    mds_data = mult_dim_scaling(points, 2)
    # mds = MDS(n_components=2)
    # X_mds = mds.fit_transform(points)
    lle_data = LLE(n_components=2, n_neighbors=8).fit_transform(points)
    iso_data = ISOMap(n_components=2, n_neighbors=10).fit_transform(points)

    plt.subplot(221)
    plt.title('PCA')
    plt.scatter(pca_data[:, 0], pca_data[:, 1], c='blue', marker='.')
    plt.subplot(222)
    plt.title('MDS')
    plt.scatter(mds_data[:, 0], mds_data[:, 1], c='red', marker='.')
    plt.subplot(223)
    plt.title('LLE')
    plt.scatter(lle_data[:, 0], lle_data[:, 1], c='yellow', marker='.')
    plt.subplot(224)
    plt.title('ISOMAP')
    plt.scatter(iso_data[:, 0], iso_data[:, 1], c='green', marker='.')

    plt.show()
def nn_check(ppd):
    for i in range(8, 26):
        lle = LLE(n_components=3,
                  n_neighbors=i,
                  method='modified',
                  modified_tol=1e-12)
        XT = lle.fit_transform(ppd)
        print('running')
        validity(XT, i)
    print('done')
Ejemplo n.º 5
0
def draw(reduction_method):
    if reduction_method == "PCA":
        method = PCA(n_components=3)
    elif reduction_method == "LLE":
        method = LLE(n_components=3, n_neighbors=5, eigen_solver="auto")
    elif reduction_method == "Isomap":
        method = Isomap(n_components=3, n_neighbors=5, eigen_solver="auto")
    elif reduction_method == "MDS":
        method = MDS(n_components=3)

    print()
    print(reduction_method + ' is being plotted')

    fitted_method = method.fit_transform(x)
    data_frame_of_method = pd.DataFrame(
        data=fitted_method,
        columns=['component 1', 'component 2', 'component 3'])

    # print(principalDf.head())
    #
    # print(data_frame[['SKC']].head())

    print(int(time.time() - start), 'seconds')

    finalDf = pd.concat([data_frame_of_method, data_frame[['SKC']]], axis=1)
    # print('========================')
    # print(finalDf.head())
    # print('========================')

    fig = plot.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_xlabel('X', fontsize=14)
    ax.set_ylabel('Y', fontsize=14)
    ax.set_zlabel('Z', fontsize=14)
    ax.set_title('3 Components ' + reduction_method, fontsize=20)

    targets = ['BKN', 'SCT', 'CLR', 'OVC']
    colors = ['r', 'g', 'b', 'k']
    for target, color in zip(targets, colors):
        indices_to_keep = finalDf['SKC'] == target
        ax.scatter(finalDf.loc[indices_to_keep, 'component 1'],
                   finalDf.loc[indices_to_keep, 'component 2'],
                   finalDf.loc[indices_to_keep, 'component 3'],
                   c=color,
                   s=1)
    ax.legend(targets)
    ax.grid
    plot.show()
Ejemplo n.º 6
0
def main():
    # ----- settings:
    dataset = 'MNIST'  # --> 'Facial' or 'MNIST' or 'Breast_cancer'
    embedding_method = 'Isomap'
    n_components = 5
    split_in_cross_validation_again = False
    load_dataset_again = False
    subset_of_MNIST = True
    pick_subset_of_MNIST_again = False
    MNIST_subset_cardinality_training = 10000  # picking from first samples of 60,000 samples
    MNIST_subset_cardinality_testing = 5000  # picking from first samples of 10,000 samples
    # ----- paths:
    if dataset == 'Facial':
        path_dataset = './input/att_database/'
        path_dataset_save = './input/pickle_dataset/Facial/'
    elif dataset == 'MNIST':
        path_dataset = './input/mnist/'
        path_dataset_save = './input/pickle_dataset/MNIST/'
    elif dataset == 'Breast_cancer':
        path_dataset = './input/Breast_cancer_dataset/wdbc_data.txt'
        path_dataset_save = './input/pickle_dataset/MNIST/'
    # ----- Loading dataset:
    print('Reading dataset...')
    if dataset == 'MNIST':
        if load_dataset_again:
            training_data = list(
                read_MNIST_dataset(dataset="training", path=path_dataset))
            testing_data = list(
                read_MNIST_dataset(dataset="testing", path=path_dataset))

            number_of_training_samples = len(training_data)
            dimension_of_data = 28 * 28
            X_train = np.empty((0, dimension_of_data))
            y_train = np.empty((0, 1))
            for sample_index in range(number_of_training_samples):
                if np.mod(sample_index, 1) == 0:
                    print('sample ' + str(sample_index) + ' from ' +
                          str(number_of_training_samples) + ' samples...')
                label, pixels = training_data[sample_index]
                pixels_reshaped = np.reshape(pixels, (1, 28 * 28))
                X_train = np.vstack([X_train, pixels_reshaped])
                y_train = np.vstack([y_train, label])
            y_train = y_train.ravel()

            number_of_testing_samples = len(testing_data)
            dimension_of_data = 28 * 28
            X_test = np.empty((0, dimension_of_data))
            y_test = np.empty((0, 1))
            for sample_index in range(number_of_testing_samples):
                if np.mod(sample_index, 1) == 0:
                    print('sample ' + str(sample_index) + ' from ' +
                          str(number_of_testing_samples) + ' samples...')
                label, pixels = testing_data[sample_index]
                pixels_reshaped = np.reshape(pixels, (1, 28 * 28))
                X_test = np.vstack([X_test, pixels_reshaped])
                y_test = np.vstack([y_test, label])
            y_test = y_test.ravel()

            save_variable(X_train, 'X_train', path_to_save=path_dataset_save)
            save_variable(y_train, 'y_train', path_to_save=path_dataset_save)
            save_variable(X_test, 'X_test', path_to_save=path_dataset_save)
            save_variable(y_test, 'y_test', path_to_save=path_dataset_save)
        else:
            file = open(path_dataset_save + 'X_train.pckl', 'rb')
            X_train = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'y_train.pckl', 'rb')
            y_train = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'X_test.pckl', 'rb')
            X_test = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'y_test.pckl', 'rb')
            y_test = pickle.load(file)
            file.close()

        if subset_of_MNIST:
            if pick_subset_of_MNIST_again:
                X_train_picked = X_train[
                    0:MNIST_subset_cardinality_training, :]
                X_test_picked = X_test[0:MNIST_subset_cardinality_testing, :]
                y_train_picked = y_train[0:MNIST_subset_cardinality_training]
                y_test_picked = y_test[0:MNIST_subset_cardinality_testing]
                save_variable(X_train_picked,
                              'X_train_picked',
                              path_to_save=path_dataset_save)
                save_variable(X_test_picked,
                              'X_test_picked',
                              path_to_save=path_dataset_save)
                save_variable(y_train_picked,
                              'y_train_picked',
                              path_to_save=path_dataset_save)
                save_variable(y_test_picked,
                              'y_test_picked',
                              path_to_save=path_dataset_save)
            else:
                file = open(path_dataset_save + 'X_train_picked.pckl', 'rb')
                X_train_picked = pickle.load(file)
                file.close()
                file = open(path_dataset_save + 'X_test_picked.pckl', 'rb')
                X_test_picked = pickle.load(file)
                file.close()
                file = open(path_dataset_save + 'y_train_picked.pckl', 'rb')
                y_train_picked = pickle.load(file)
                file.close()
                file = open(path_dataset_save + 'y_test_picked.pckl', 'rb')
                y_test_picked = pickle.load(file)
                file.close()
            X_train = X_train_picked
            X_test = X_test_picked
            y_train = y_train_picked
            y_test = y_test_picked
        image_shape = (28, 28)
    elif dataset == 'Facial':
        if load_dataset_again:
            X, y, image_shape = read_image_dataset(dataset_path=path_dataset,
                                                   imagesType='.jpg')
            save_variable(variable=X,
                          name_of_variable='X',
                          path_to_save=path_dataset_save)
            save_variable(variable=y,
                          name_of_variable='y',
                          path_to_save=path_dataset_save)
            save_variable(variable=image_shape,
                          name_of_variable='image_shape',
                          path_to_save=path_dataset_save)
        else:
            file = open(path_dataset_save + 'X.pckl', 'rb')
            X = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'y.pckl', 'rb')
            y = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'image_shape.pckl', 'rb')
            image_shape = pickle.load(file)
            file.close()
    elif dataset == 'Breast_cancer':
        data = pd.read_csv(
            path_dataset, sep=",", header=None
        )  # read text file using pandas dataFrame: https://stackoverflow.com/questions/21546739/load-data-from-txt-with-pandas
        labels_of_classes = ['M', 'B']
        X, y = read_BreastCancer_dataset(data=data,
                                         labels_of_classes=labels_of_classes)
        X = X.astype(
            np.float64
        )  #---> otherwise MDS has error --> https://stackoverflow.com/questions/16990996/multidimensional-scaling-fitting-in-numpy-pandas-and-sklearn-valueerror
        # --- cross validation:
        path_to_save = './input/split_data/'
        portion_of_test_in_dataset = 0.3
        number_of_folds = 10
        if split_in_cross_validation_again:
            train_indices_in_folds, test_indices_in_folds, \
            X_train_in_folds, X_test_in_folds, y_train_in_folds, y_test_in_folds = \
                cross_validation(X=X, y=y, n_splits=number_of_folds, test_size=portion_of_test_in_dataset)
            save_variable(train_indices_in_folds,
                          'train_indices_in_folds',
                          path_to_save=path_to_save)
            save_variable(test_indices_in_folds,
                          'test_indices_in_folds',
                          path_to_save=path_to_save)
            save_variable(X_train_in_folds,
                          'X_train_in_folds',
                          path_to_save=path_to_save)
            save_variable(X_test_in_folds,
                          'X_test_in_folds',
                          path_to_save=path_to_save)
            save_variable(y_train_in_folds,
                          'y_train_in_folds',
                          path_to_save=path_to_save)
            save_variable(y_test_in_folds,
                          'y_test_in_folds',
                          path_to_save=path_to_save)
            for fold_index in range(number_of_folds):
                save_np_array_to_txt(np.asarray(
                    train_indices_in_folds[fold_index]),
                                     'train_indices_in_fold' + str(fold_index),
                                     path_to_save=path_to_save)
                save_np_array_to_txt(np.asarray(
                    test_indices_in_folds[fold_index]),
                                     'test_indices_in_folds' + str(fold_index),
                                     path_to_save=path_to_save)
        else:
            file = open(path_to_save + 'train_indices_in_folds.pckl', 'rb')
            train_indices_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'test_indices_in_folds.pckl', 'rb')
            test_indices_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'X_train_in_folds.pckl', 'rb')
            X_train_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'X_test_in_folds.pckl', 'rb')
            X_test_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'y_train_in_folds.pckl', 'rb')
            y_train_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'y_test_in_folds.pckl', 'rb')
            y_test_in_folds = pickle.load(file)
            file.close()

    print(X_train.shape)
    print(X_test.shape)

    # ----- embedding:
    print('Embedding...')
    if dataset == 'MNIST':
        # plot_components(X_projected=X_projected, images=X.reshape((-1, image_shape[0], image_shape[1])), ax=ax, image_scale=0.6, markersize=10, thumb_frac=0.05, cmap='gray_r')

        # ----- embedding:
        if embedding_method == 'LLE':
            clf = LLE(n_neighbors=5,
                      n_components=n_components,
                      method='standard')
            clf.fit(X=X_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'Isomap':
            clf = Isomap(n_neighbors=5, n_components=n_components)
            clf.fit(X=X_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'MDS':
            clf = MDS(n_components=n_components)
            X_projected = clf.fit_transform(X=np.vstack([X_train, X_test]))
            X_train_projected = X_projected[:X_train.shape[0], :]
            X_test_projected = X_projected[X_train.shape[0]:, :]
        elif embedding_method == 'PCA':
            clf = PCA(n_components=n_components)
            clf.fit(X=X_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'KernelPCA':
            clf = KernelPCA(n_components=n_components, kernel='rbf')
            clf.fit(X=X_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'LaplacianEigenmap':
            clf = LaplacianEigenmap(n_neighbors=5, n_components=n_components)
            X_projected = clf.fit_transform(X=np.vstack([X_train, X_test]))
            X_train_projected = X_projected[:X_train.shape[0], :]
            X_test_projected = X_projected[X_train.shape[0]:, :]
        elif embedding_method == 'LDA':
            clf = LDA(n_components=n_components)
            clf.fit(X=X_train, y=y_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'SPCA':
            clf = SPCA(n_components=n_components)
            clf.fit(X=X_train, y=y_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'TSNE':
            clf = TSNE(n_components=min(3, n_components))
            # print(type(list(y_train)))
            X_projected = clf.fit_transform(
                X=np.vstack([X_train, X_test]),
                y=np.asarray(list(y_train) + list(y_test)))
            X_train_projected = X_projected[:X_train.shape[0], :]
            X_test_projected = X_projected[X_train.shape[0]:, :]
        elif embedding_method == 'ML':
            clf = ML(n_components=n_components)
            clf.fit(X=X_train, y=y_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'Kernel_FLDA':
            clf = Kernel_FLDA(n_components=n_components, kernel='linear')
            clf.fit(X=X_train, y=y_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'No_embedding':
            X_train_projected = X_train
            X_test_projected = X_test

        # --- classification:
        print('Classification...')
        # clf = KNN(n_neighbors=1)
        clf = NB()
        clf.fit(X=X_train_projected, y=y_train)
        y_pred = clf.predict(X=X_test_projected)
        accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
        error = 1 - accuracy_score(y_true=y_test, y_pred=y_pred)

        # --- saving results:
        save_variable(accuracy, 'accuracy', path_to_save='./output/MNIST/')
        save_np_array_to_txt(np.asarray(accuracy),
                             'accuracy',
                             path_to_save='./output/MNIST/')
        save_variable(error, 'error', path_to_save='./output/MNIST/')
        save_np_array_to_txt(np.asarray(error),
                             'error',
                             path_to_save='./output/MNIST/')
        # --- report results:
        print(' ')
        print('Accuracy: ', accuracy * 100)
        print(' ')
        print('Error: ', error * 100)
Ejemplo n.º 7
0
def doLLE(data):
    lle = LLE(n_components=12)
    lle_data = lle.fit_transform(data)
    return lle_data
Ejemplo n.º 8
0
    args = get_args()
    args.sub_question = [int(i) for i in args.sub_question]

    X_train, Y_train, X_test, Y_test = get_data()

    if 1 in args.sub_question:

        pca = PCA(n_components=2)
        X_PCA = pca.fit_transform(X_train)
        show_data(X_PCA, Y_train, 'PCA')

        isomap = Isomap(n_components=2)
        X_Isomap = isomap.fit_transform(X_train)
        show_data(X_Isomap, Y_train, 'Isomap')

        lle = LLE(n_components=2)
        X_LLE = lle.fit_transform(X_train)
        show_data(X_LLE, Y_train, 'LLE')

        tsne = TSNE(n_components=2)
        X_TSNE = tsne.fit_transform(X_train)
        show_data(X_TSNE, Y_train, 'tSNE')

    if 2 in args.sub_question:
        f_nums = [1, 10, 20, 50, 100, 300]

        for num in f_nums:
            pca = PCA(n_components=num)
            X_PCA = pca.fit_transform(np.concatenate([X_train, X_test]))
            X_PCA_train = X_PCA[:X_train.shape[0]]
            X_PCA_test = X_PCA[X_train.shape[0]:]
Ejemplo n.º 9
0
# xdata = data3D.transpose()[1]
# ydata = data3D.transpose()[2]
p3D = plt.axes(projection='3d')
xdata, ydata, zdata = tuple(data3D.transpose())
p3D.scatter3D(xdata, ydata, zdata, c=zdata, cmap='Reds')
plt.show()

pca = PCA(copy=True, n_components=2)
data2D_pca = pca.fit_transform(data3D)
plt.scatter(*tuple(data2D_pca.transpose()), c="red")
plt.title("sklearn PCA")
plt.show()

data2D_pca_mine = myPCA.fit(data3D, dim_goal=2)
plt.scatter(*tuple(data2D_pca_mine.transpose()), c="pink")
plt.title("my PCA")
plt.show()

data3D = _data3D.copy()
# print(data3D)
mds = MDS(n_components=2)
data2D_mds = mds.fit_transform(data3D)
# print(data2D_mds)
plt.scatter(*tuple(data2D_mds.transpose()), c="blue")
plt.title("sklearn MDS")
plt.show()

data3D = _data3D.copy()
lle = LLE(n_neighbors=7, n_components=2)
data2D_mds = lle.fit_transform(data3D)
#         count +=1
#         print(count)
#
# fig.tight_layout()
# =============================================================================

#  _____  _             _____          _            _   _
# |  __ \(_)           |  __ \        | |          | | (_)
# | |  | |_ _ __ ___   | |__) |___  __| |_   _  ___| |_ _  ___  _ __
# | |  | | | '_ ` _ \  |  _  // _ \/ _` | | | |/ __| __| |/ _ \| '_ \
# | |__| | | | | | | | | | \ \  __/ (_| | |_| | (__| |_| | (_) | | | |
# |_____/|_|_| |_| |_| |_|  \_\___|\__,_|\__,_|\___|\__|_|\___/|_| |_|
#

################### Reduce dimensions ########################
off_f_red = LLE(n_neighbors=50,
                n_components=3).fit_transform(np.transpose(all_off_f))
#off_f_red = LLE(n_neighbors = 50,n_components=3).fit_transform(np.transpose(off_firing[0]))
#off_f_red = TSNE(n_components=3).fit_transform(np.transpose(all_off_f))

## 3D Plot for single trajectory
for i in range(4):
    fig = plt.figure()
    ax = Axes3D(fig)
    trial_len = int((tot_time - window_size) / step_size) - 1
    ran_inds = np.arange((trial_len * i), (trial_len * (i + 1)))
    this_cmap = Colormap('hsv')
    p = ax.scatter(off_f_red[ran_inds, 0],
                   off_f_red[ran_inds, 1],
                   off_f_red[ran_inds, 2],
                   c=np.linspace(1, 255, len(ran_inds)),
                   cmap='hsv')
Ejemplo n.º 11
0
model = MDS(n_components=2, random_state=2)
outS = model.fit_transform(XS)
plt.scatter(outS[:, 0], outS[:, 1], **colorize)
plt.axis('equal')
show()

#we lost the y axis instead of unwrapping

#Nonlinear Manifolds: locally Linear Embedding
#preserve only the distances of nearbny
#we can use this (LLE) to unwrap out data

from sklearn.manifold import LocallyLinearEmbedding as LLE

model = LLE(n_neighbors=100, n_components=2, eigen_solver='dense')
out = model.fit_transform(XS)

fig, ax = plt.subplots()
ax.scatter(out[:, 0], out[:, 1], **colorize)
ax.set_ylim(0.15, -0.15)
show()  #pretty close to the original

print("isomaps")
#example: isomaps on faces

#get the data
from sklearn.datasets import fetch_lfw_people

faces = fetch_lfw_people(min_faces_per_person=30)
#(2370, 2914) shape
Ejemplo n.º 12
0
def apply_dr(input_file,
             output_folder,
             dataset_name="MNIST",
             dr_name="PCA",
             perplexity=None,
             n_neighbors=None,
             min_dist=None,
             max_samples=5000,
             size=None,
             c=None):
    fn = "{dataset_name}{size}{c}{dr_name}{perp}{neigh}{mindist}".format(
        dataset_name=dataset_name,
        size="_size" + str(size) if size is not None else "",
        c="_c" + str(c) if c is not None else "",
        dr_name="_" + dr_name,
        perp="_p" + str(perplexity) if perplexity is not None else "",
        neigh="_n" + str(n_neighbors) if n_neighbors is not None else "",
        mindist="_d" + str(min_dist) if min_dist is not None else "",
    )

    if os.path.exists(output_folder + fn + ".csv"):
        print("---------Skipping: {}{}-----------".format(input_file, fn))
        return

    try:
        df = pd.read_csv(input_file)
        print(("---------Startings: {} - {}-----------".format(input_file,
                                                               fn)))
    except:
        print("{} - does not exist".format(fn))
        return

    y = df["labels"]
    X = df.iloc[:, :-2]

    if df.shape[0] > max_samples:
        X_train, features, y_train, labels = train_test_split(
            X, y, test_size=max_samples, random_state=42, stratify=y)
    else:
        features = X
        labels = y

    idx = list(features.index)
    filename = df.loc[idx, "filename"]
    ########

    ## apply dr
    if dr_name == "CPCA":
        dr = CPCA(n_components=2)

    if dr_name == "PCA":
        dr = PCA(n_components=2)

    elif dr_name == "TSNE":
        dr = TSNE(n_components=2, perplexity=perplexity, verbose=0)

    elif dr_name == "ISM":
        dr = Isomap(n_components=2, n_neighbors=n_neighbors)

    elif dr_name == "LLE":
        dr = LLE(n_components=2, n_neighbors=n_neighbors)

    elif dr_name == "SE":
        dr = SE(n_components=2, n_neighbors=n_neighbors)

    elif dr_name == "UMAP":
        dr = umap.UMAP(n_components=2,
                       n_neighbors=n_neighbors,
                       verbose=False,
                       min_dist=min_dist)

    elif dr_name == "GRP":
        dr = GRP(n_components=2)

    elif dr_name == "MDS":
        dr = MDS(n_components=2)

    try:
        dr_data = dr.fit_transform(features)
    except:
        return

    dr_data = pd.DataFrame(
        dr_data, columns=["{}_1".format(dr_name), "{}_2".format(dr_name)])
    dr_data.index = idx

    ## save stuff
    if labels is not None:
        dr_data["labels"] = list(labels)
        dr_data["filename"] = list(filename)

        # fig, ax = plt.subplots()
        # sns.scatterplot(dr_data['{}_1'.format(dr_name)], dr_data['{}_2'.format(dr_name)], hue = dr_data['labels'], ax=ax)
        # plt.savefig(dataset_name + '/figures/1_' + fn +'.pdf')
        # plt.close('all')

    dr_data.to_csv(output_folder + fn + ".csv", index=False)
    print(("---------Finished: {}{}-----------".format(dataset_name, fn)))

    return
def SLLE():
    le = LLE(n_components=3, n_neighbors=14)
    slle = le.fit_transform(ppd)
    km_slle2 = Kmeans_2D(slle, "KM Clustering on 2D Standard LLE.html", 8)
    km_slle3 = Kmeans_3D(slle, "KM Clustering on 3D Standard LLE.html", 8)
    km_lmaps2 = Kmeans_2D(lmaps,
                          "KM Clustering on 2D Laplacian Eigenmaps.html", 8)
    km_lmaps3 = Kmeans_3D(lmaps, "KM Clustering on 3D Laplacian Eigenmaps", 8)


# In[20]:
SLLE()
Iso_map()
Laplacian_eigenmap()

# In[21]:
#to get the best parameters for dimensionality reduction and clustering
nn_check(ppd)
# In[22]:
#Modified LLE
lle = LLE(n_components=5, n_neighbors=8, method='modified', modified_tol=1e-12)
middle = lle.fit_transform(
    ppd)  #passing adata.X giving different clustering results

# In[23]:
lle = LLE(n_components=3,
          n_neighbors=11,
          method='modified',
          modified_tol=1e-12)
reduced_lle = lle.fit_transform(ppd)
# In[24]:
km_mds2 = Kmeans_2D(reduced_lle, "KM Clustering on 2D Modified LLE.html", 7)
km_mds3 = Kmeans_3D(reduced_lle, "KM Clustering on 3D Modified LLE.html", 7)

# In[25]:
#call ICA function
Ejemplo n.º 15
0
df = df.dropna(axis='columns')
X = df.iloc[:, 5:]
y = df.iloc[:, 0:5]

# Scale
from sklearn import preprocessing
X = preprocessing.scale(X)

# Locally Linear Embedding 2 Components
from sklearn.manifold import LocallyLinearEmbedding as LLE
n_components = 2
neighbors = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

for neighbor in neighbors:  # Prodcues dataset for each neighbor
    embedding = LLE(n_neighbors=neighbor,
                    n_components=n_components,
                    eigen_solver='dense',
                    reg=0.001)
    columns = ["LLE_{}".format(j + 1) for j in range(n_components)]

    X_transformed = pd.DataFrame(embedding.fit_transform(X), columns=columns)

    pc_df = pd.concat([y, X_transformed], axis=1, sort=False)
    pc_df.to_csv('./Data/Reduced DataFrames/LLE/LLE-{}N.csv'.format(neighbor),
                 header=True,
                 index=False)
    print("Round Done: {}".format(neighbor))

# 100 Components
from sklearn.manifold import LocallyLinearEmbedding as LLE
n_components = 100
neighbors = 13
Ejemplo n.º 16
0
def main():
    # load ORL or load Yale
    xTrain_, yTrain, xTest_, yTest = loadORLImages(u'./att_faces', 5)
    #    xTrain_, yTrain, xTest_, yTest = loadYaleImages()
    # WT+PCA+SVM
    # WT
    xTrain = np.array(wavelet_transform(xTrain_))
    xTest = np.array(wavelet_transform(xTest_))
    #Yale dataset wavelet
    #    xTrain = np.array(wavelet_transform(xTrain_,100,100))
    #    xTest = np.array(wavelet_transform(xTest_,100,100))
    # PCA
    data = np.float32(np.mat(xTrain))
    pca = PCA(n_components=50)
    pca.fit(data)
    xTrain = pca.transform(data)
    print('PCA解释率%s' % sum(pca.explained_variance_ratio_))
    xTest = pca.transform(np.float32(np.mat(xTest)))
    # SVM
    score = SVM_GridSearch(xTrain, yTrain, xTest, yTest)
    print('WT+PCA+SVM精度为%s' % score)

    # PCA+SVM
    # PCA
    data = np.float32(np.mat(xTrain_))
    pca = PCA(n_components=50)
    pca.fit(data)
    xTrain = pca.transform(data)
    print('PCA解释率%s' % sum(pca.explained_variance_ratio_))
    xTest = pca.transform(np.float32(np.mat(xTest_)))
    # SVM
    score = SVM_GridSearch(xTrain, yTrain, xTest, yTest)
    print('PCA+SVM精度为%s' % score)

    # LDA+SVM
    #    #%% LDA directly
    #    clf = LDA()
    #    clf.fit(xTrain_, yTrain)
    #    yPredict = clf.predict(xTest_)
    #    print(np.where(yPredict != np.array(yTest)))
    #    print(u'LDA识别率: %.2f%%' % ((yPredict == np.array(yTest)).mean()*100))

    #use for feature extration
    clf = LDA(n_components=50)
    clf.fit(xTrain_, yTrain)
    xTrain = clf.transform(xTrain_)  #xTrain为降维后的数据
    xTest = clf.transform(xTest_)
    #print ('LDA的数据中心点:',clf.means_) #中心点
    print('LDA做分类时的正确率:', clf.score(xTest_, yTest))  #score是指分类的正确率
    # SVM
    score = SVM_GridSearch(xTrain, yTrain, xTest, yTest)
    print('LDA+SVM精度为%s' % score)

    # LLE+SVM
    from sklearn.manifold import LocallyLinearEmbedding as LLE
    lle = LLE(n_neighbors=30, n_components=50, method='standard')
    lle.fit(xTrain_)
    xTrain = lle.transform(xTrain_)
    xTest = lle.transform(xTest_)
    #    trans_data,err = lle.fit_transform(xTrain_)
    #    print("LLE Done. Reconstruction error: %g" % err)
    # SVM
    score = SVM_GridSearch(xTrain, yTrain, xTest, yTest)
    print('LLE+SVM精度为%s' % score)
Ejemplo n.º 17
0
def main(args):
    outputdir = os.path.dirname(args.vectors)
    #winidx_path = os.path.join(outputdir,
    #    'cos-distance_' + os.path.basename(args.weights))
    point_path = os.path.splitext(args.vectors)[0] + \
        '_{0}_{1}d-points_it{2}_s{3}.txt'.format(
        args.algorithm, args.components, args.iteration, args.samples)
    fig_path = os.path.splitext(args.vectors)[0] + \
        '_{0}_it{1}_s{2}.eps'.format(args.algorithm, args.iteration, args.samples)

    print('loading val...')
    val = utils.io.load_image_list(args.val)
    categories = utils.io.load_categories(args.categories)

    v = np.load(args.vectors)
    N = v.shape[0]
    d = v.shape[1]
    C = len(categories)
    NperC = N // C

    samples_per_c = args.samples
    random_order = np.random.permutation(NperC)
    selected_vectors = []
    selected_images = []
    Ys = []
    for i in range(C):
        selected_vectors.extend(
            [v[i * NperC + ii] for ii in random_order[:samples_per_c]])
        selected_images.extend(
            [val[i * NperC + ii][0] for ii in random_order[:samples_per_c]])
        Ys.extend(
            [val[i * NperC + ii][1] for ii in random_order[:samples_per_c]])

    #print(selected_vectors)
    #print(Ys)
    if args.algorithm == 'tsne':
        model = utils.TSNE(n_components=args.components,
                           n_iter=args.iteration,
                           n_iter_without_progress=args.preprocessdim,
                           angle=args.angle,
                           metric=args.metric)
    elif args.algorithm == 'mds':
        model = MDS(n_components=args.components, n_jobs=-1)
    elif args.algorithm == 'lle':
        model = LLE(n_components=args.components,
                    n_neighbors=args.neighbors,
                    n_jobs=-1)
    elif args.algorithm == 'isomap':
        model = Isomap(n_components=args.components,
                       n_neighbors=args.neighbors,
                       n_jobs=-1)
    elif args.algorithm == 'pca':
        model = PCA(n_components=args.components)
    #X = model.fit_transform(v[:23*10])
    print('fitting...')
    X = model.fit_transform(np.array(selected_vectors))
    Y = np.asarray([x[1] for x in val])

    if args.algorithm == 'pca':
        pca = PCA(n_components=100)
        pca.fit(np.array(selected_vectors))
        E = pca.explained_variance_ratio_
        print "explained", E
        print "cumsum E", np.cumsum(E)

    print('drawing...')

    markers = ['o', 'x', 'v', '+']

    if args.components == 2:
        plt.figure(2, figsize=(8, 6))
        plt.clf()

        #plt.scatter(X[:, 0], X[:, 1], c=Y[:23*10], cmap=plt.cm.jet)
        #plt.scatter(X[:, 0], X[:, 1], c=np.array(Ys), cmap=plt.cm.jet, label=categories)

        for i in range(C):
            plt.scatter(X[samples_per_c * i:samples_per_c * (i + 1), 0],
                        X[samples_per_c * i:samples_per_c * (i + 1), 1],
                        marker=markers[i % len(markers)],
                        s=10,
                        color=plt.cm.jet(float(i) / (C - 1)),
                        label=categories[i])
        plt.xlabel(args.algorithm + '1')
        plt.ylabel(args.algorithm + '2')
        plt.legend(fontsize=10.25,
                   scatterpoints=1,
                   bbox_to_anchor=(1.05, 1.01),
                   loc='upper left')
        plt.subplots_adjust(right=0.7)
        #plt.show()
        plt.savefig(fig_path)
    elif args.components == 3:
        from mpl_toolkits.mplot3d import Axes3D
        fig = plt.figure()
        ax = Axes3D(fig)
        ax.set_xlabel("X-axis")
        ax.set_ylabel("Y-axis")
        ax.set_zlabel("Z-axis")
        for i in range(C):
            ax.scatter(X[samples_per_c * i:samples_per_c * (i + 1), 0],
                       X[samples_per_c * i:samples_per_c * (i + 1), 1],
                       X[samples_per_c * i:samples_per_c * (i + 1), 2],
                       marker=markers[i % len(markers)],
                       s=10,
                       c=plt.cm.jet(float(i) / (C - 1)),
                       label=categories[i])
        plt.show()

    print(model.get_params())
    # save points
    with open(point_path, 'w') as fp:
        for path, t, p in zip(selected_images, Ys, X):
            fp.write("{0}\t{1}\t{2}\n".format(path, t, '\t'.join(map(str, p))))