예제 #1
0
def reduction(data, params):

    # parse parameters

    possible_keys = [
        'n_neighbors',
        'n_components',
        'eigen_solver',
        'tol',
        'max_iter',
        'path_method',
        'neighbors_algorithm',
    ]
    for item in params:
        if item not in possible_keys: ERROR(item)
        if isinstance(params[item], str):
            exec(item + '=' + '"' + params[item] + '"')
        else:
            exec(item + '=' + str(params[item]))

    # apply ISOMAP

    X = manifold.Isomap(
        n_neighbors=n_neighbors,
        n_components=n_components,
        eigen_solver=eigen_solver,
        tol=tol,
        max_iter=max_iter,
        path_method=path_method,
        neighbors_algorithm=neighbors_algorithm).fit_transform(data)

    return X
예제 #2
0
def clustering(data, params):

    # parse parameters

    possible_keys = [
        'n_clusters',
        'affinity',
        'linkage',
    ]
    for item in params:
        if item not in possible_keys: ERROR(item)
        if isinstance(params[item], str):
            exec(item + '=' + '"' + params[item] + '"')
        else:
            exec(item + '=' + str(params[item]))

    # apply Agglomerative Clustering to reduced data

    clusters = AgglomerativeClustering(n_clusters=n_clusters,
                                       affinity=affinity,
                                       linkage=linkage)
    clusters.fit(data)

    # Agglomerative Clustering does not give centers of clusters
    # so lets try the mean of each cluster

    cluster_centers = []
    for i in range(n_clusters):
        mask = (clusters.labels_ == i)
        cluster_centers.append(mean(data[mask], axis=0))
    cluster_centers = array(cluster_centers)

    return [cluster_centers, clusters.labels_]
예제 #3
0
def clustering(data, params):

    # parse arguments

    possible_keys=['n_clusters','tol','init','n_jobs',]
    for item in params:
	if item not in possible_keys: ERROR(item)
        if isinstance(params[item], str):
            exec(item+'='+'"'+params[item]+'"')
        else:
            exec(item+'='+str(params[item]))

    # apply KMeans to reduced data

    clusters = KMeans(n_clusters=n_clusters, init=init, tol=tol)
    clusters.fit(data)

    return [clusters.cluster_centers_, clusters.labels_]
예제 #4
0
def clustering(data, params):

    # parse arguments

    possible_keys=['quantile','cluster_all',]
    for item in params:
	if item not in possible_keys: ERROR(item)
        if isinstance(params[item], str):
            exec(item+'='+'"'+params[item]+'"')
        else:
            exec(item+'='+str(params[item]))

    # apply Mean Shift to reduced data

    bandwidth = estimate_bandwidth(data, quantile=quantile)
    clusters = MeanShift(bandwidth, cluster_all=cluster_all)
    clusters.fit(data)

    return [clusters.cluster_centers_, clusters.labels_]
예제 #5
0
def reduction(data, params):

    # parse parameters

    possible_keys = [
        'components',
    ]
    for item in params:
        if item not in possible_keys: ERROR(item)
        if isinstance(params[item], str):
            exec(item + '=' + '"' + params[item] + '"')
        else:
            exec(item + '=' + str(params[item]))

    # apply PCA

    pca = PCA(n_components=n_components)
    pca.fit(data)
    X = pca.transform(data)

    return X
예제 #6
0
def reduction(data, params):

    # parse arguments

    possible_keys = [
        'data_errors_file',
        'n_components',
        'smooth',
        'n_iter',
    ]
    for item in params:
        if item not in possible_keys: ERROR(item)
        if isinstance(params[item], str):
            exec(item + '=' + '"' + params[item] + '"')
        else:
            exec(item + '=' + str(params[item]))

    # read errors file name

    # data_errors_file = '../data/errors.dat'
    if errors_file == None:
        weights = None
    else:
        errors = loadtxt(errors_file)
        weights = 1. / (errors)**2

    # apply EMPCA

    centered_der = data - mean(data, 0)
    # m = empca(centered_der, 1./(errors)**2, nvec=5, smooth=0, niter=50)
    m = empca(centered_der,
              weights,
              nvec=n_components,
              smooth=smooth,
              niter=n_iter,
              silent=True)
    X = m.coeff

    return X
예제 #7
0
def clustering(data, params):

    # parse parameters

    possible_keys = [
        'eps',
        'min_samples',
        'metric',
        'algorithm',
        'leaf_size',
    ]
    for item in params:
        if item not in possible_keys: ERROR(item)
        if isinstance(params[item], str):
            exec(item + '=' + '"' + params[item] + '"')
        else:
            exec(item + '=' + str(params[item]))

    # apply DBSCAN to reduced data

    clusters = DBSCAN(eps=eps,
                      min_samples=min_samples,
                      metric=metric,
                      algorithm=algorithm,
                      leaf_size=leaf_size).fit(data)
    labels = clusters.labels_
    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)

    # DBSCAN does not give centers of clusters
    # so lets try the mean of each cluster

    cluster_centers = []
    for i in range(n_clusters):
        mask = (clusters.labels_ == i)
        cluster_centers.append(mean(data[mask], axis=0))
    cluster_centers = array(cluster_centers)

    return [cluster_centers, clusters.labels_]
예제 #8
0
from rpy2.robjects import r
import numpy as np
import os,sys
from aux import ERROR

def reduction(data, params):

	in_param	= ''
    # parse parameters

    possible_keys=['n_layers','training_frame','activation','autoencoder','hidden','epochs','ignore_const_cols',]
    for item in params:
	    if item not in possible_keys: ERROR(item)
	    if isinstance(params[item], str):
	        in_param += ', '+item+' = '+params[item]
	    else:
	        exec(item+'='+str(params[item]))

    # apply PCA

	
	size_data 	= np.shape(data)[1]
	in_param	= 'x=1:%d'%(size_data) + ''.join([ ', '+item+' = '+params[item] for item in params if isinstance(params[item], str)])

#	print('')
#	print(in_param[9:])
#	print(in_param)
#	print('\n')
	
	
	current_path	= os.path.abspath('')