def reduction(data, params): # parse parameters possible_keys = [ 'n_neighbors', 'n_components', 'eigen_solver', 'tol', 'max_iter', 'path_method', 'neighbors_algorithm', ] for item in params: if item not in possible_keys: ERROR(item) if isinstance(params[item], str): exec(item + '=' + '"' + params[item] + '"') else: exec(item + '=' + str(params[item])) # apply ISOMAP X = manifold.Isomap( n_neighbors=n_neighbors, n_components=n_components, eigen_solver=eigen_solver, tol=tol, max_iter=max_iter, path_method=path_method, neighbors_algorithm=neighbors_algorithm).fit_transform(data) return X
def clustering(data, params): # parse parameters possible_keys = [ 'n_clusters', 'affinity', 'linkage', ] for item in params: if item not in possible_keys: ERROR(item) if isinstance(params[item], str): exec(item + '=' + '"' + params[item] + '"') else: exec(item + '=' + str(params[item])) # apply Agglomerative Clustering to reduced data clusters = AgglomerativeClustering(n_clusters=n_clusters, affinity=affinity, linkage=linkage) clusters.fit(data) # Agglomerative Clustering does not give centers of clusters # so lets try the mean of each cluster cluster_centers = [] for i in range(n_clusters): mask = (clusters.labels_ == i) cluster_centers.append(mean(data[mask], axis=0)) cluster_centers = array(cluster_centers) return [cluster_centers, clusters.labels_]
def clustering(data, params): # parse arguments possible_keys=['n_clusters','tol','init','n_jobs',] for item in params: if item not in possible_keys: ERROR(item) if isinstance(params[item], str): exec(item+'='+'"'+params[item]+'"') else: exec(item+'='+str(params[item])) # apply KMeans to reduced data clusters = KMeans(n_clusters=n_clusters, init=init, tol=tol) clusters.fit(data) return [clusters.cluster_centers_, clusters.labels_]
def clustering(data, params): # parse arguments possible_keys=['quantile','cluster_all',] for item in params: if item not in possible_keys: ERROR(item) if isinstance(params[item], str): exec(item+'='+'"'+params[item]+'"') else: exec(item+'='+str(params[item])) # apply Mean Shift to reduced data bandwidth = estimate_bandwidth(data, quantile=quantile) clusters = MeanShift(bandwidth, cluster_all=cluster_all) clusters.fit(data) return [clusters.cluster_centers_, clusters.labels_]
def reduction(data, params): # parse parameters possible_keys = [ 'components', ] for item in params: if item not in possible_keys: ERROR(item) if isinstance(params[item], str): exec(item + '=' + '"' + params[item] + '"') else: exec(item + '=' + str(params[item])) # apply PCA pca = PCA(n_components=n_components) pca.fit(data) X = pca.transform(data) return X
def reduction(data, params): # parse arguments possible_keys = [ 'data_errors_file', 'n_components', 'smooth', 'n_iter', ] for item in params: if item not in possible_keys: ERROR(item) if isinstance(params[item], str): exec(item + '=' + '"' + params[item] + '"') else: exec(item + '=' + str(params[item])) # read errors file name # data_errors_file = '../data/errors.dat' if errors_file == None: weights = None else: errors = loadtxt(errors_file) weights = 1. / (errors)**2 # apply EMPCA centered_der = data - mean(data, 0) # m = empca(centered_der, 1./(errors)**2, nvec=5, smooth=0, niter=50) m = empca(centered_der, weights, nvec=n_components, smooth=smooth, niter=n_iter, silent=True) X = m.coeff return X
def clustering(data, params): # parse parameters possible_keys = [ 'eps', 'min_samples', 'metric', 'algorithm', 'leaf_size', ] for item in params: if item not in possible_keys: ERROR(item) if isinstance(params[item], str): exec(item + '=' + '"' + params[item] + '"') else: exec(item + '=' + str(params[item])) # apply DBSCAN to reduced data clusters = DBSCAN(eps=eps, min_samples=min_samples, metric=metric, algorithm=algorithm, leaf_size=leaf_size).fit(data) labels = clusters.labels_ n_clusters = len(set(labels)) - (1 if -1 in labels else 0) # DBSCAN does not give centers of clusters # so lets try the mean of each cluster cluster_centers = [] for i in range(n_clusters): mask = (clusters.labels_ == i) cluster_centers.append(mean(data[mask], axis=0)) cluster_centers = array(cluster_centers) return [cluster_centers, clusters.labels_]
from rpy2.robjects import r import numpy as np import os,sys from aux import ERROR def reduction(data, params): in_param = '' # parse parameters possible_keys=['n_layers','training_frame','activation','autoencoder','hidden','epochs','ignore_const_cols',] for item in params: if item not in possible_keys: ERROR(item) if isinstance(params[item], str): in_param += ', '+item+' = '+params[item] else: exec(item+'='+str(params[item])) # apply PCA size_data = np.shape(data)[1] in_param = 'x=1:%d'%(size_data) + ''.join([ ', '+item+' = '+params[item] for item in params if isinstance(params[item], str)]) # print('') # print(in_param[9:]) # print(in_param) # print('\n') current_path = os.path.abspath('')