def distance_director_euclidian_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2): from shogun.Distance import EuclidianDistance from modshogun import Time feats_train=RealFeatures(fm_train_real) feats_train.io.set_loglevel(0) feats_train.parallel.set_num_threads(1) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance() distance.init(feats_train, feats_test) ddistance=DirectorEuclidianDistance() ddistance.init(feats_train, feats_test) print "dm_train" t=Time() dm_train=distance.get_distance_matrix() t1=t.cur_time_diff(True) print "ddm_train" t=Time() ddm_train=ddistance.get_distance_matrix() t2=t.cur_time_diff(True) print "dm_train", dm_train print "ddm_train", ddm_train return dm_train, ddm_train
def euclidian_distance (): print 'EuclidianDistance' from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix()
def distance_euclidian_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test
def assign_labels(data, centroids): from shogun.Classifier import KNN from numpy import arange labels = Labels(arange(1., 11.)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclidianDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea)
def run_clustering(data, k): from shogun.Clustering import KMeans from shogun.Mathematics import Math_init_random Math_init_random(42) fea = RealFeatures(data) distance = EuclidianDistance(fea, fea) kmeans = KMeans(k, distance) print("Running clustering...") kmeans.train() return kmeans.get_cluster_centers()
def converter_multidimensionalscaling_modular(data): from shogun.Features import RealFeatures from shogun.Converter import MultidimensionalScaling from shogun.Distance import EuclidianDistance features = RealFeatures(data) distance_before = EuclidianDistance() distance_before.init(features,features) converter = MultidimensionalScaling() converter.set_target_dim(2) converter.set_landmark(False) embedding =converter.apply(features) distance_after = EuclidianDistance() distance_after.init(embedding,embedding) distance_matrix_after = distance_after.get_distance_matrix() distance_matrix_before = distance_before.get_distance_matrix() return numpy.linalg.norm(distance_matrix_after-distance_matrix_before)/numpy.linalg.norm(distance_matrix_before)
def classify_input(points, cdata, transpose): clusteredpoints = list() for i in range(len(cdata[0])): clusteredpoints.append(list()) if transpose == 1: from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from numpy import min, where obs = RealFeatures(points) centroids = RealFeatures(cdata) distance = EuclidianDistance(obs, centroids) # out1 = distance.get_distance_matrix() distance.init(obs, centroids) out = distance.get_distance_matrix() labels = list() for i in range(len(out)): # dist = distances[:len(points),i] dist = zip(out)[i] minimum = min(dist) itemindex=where(dist==minimum) labels.append(itemindex[1][0]) print len(points) print len(points[0]) for i in range(len(clusteredpoints)): for types in range(len(points)): clusteredpoints[i].append(list()) for index in range(len(labels)): for item in range(len(points)): clusteredpoints[labels[index]][item].append(points[item][index]) return clusteredpoints, labels
def bench_shogun(X, y, T, valid): # # .. Shogun .. # from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import KMeans start = datetime.now() feat = RealFeatures(X.T) distance = EuclidianDistance(feat, feat) clf = KMeans(n_components, distance) clf.train() delta = datetime.now() - start return inertia(X, clf.get_cluster_centers().T), delta
def distance_normsquared_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) distance.set_disable_sqrt(True) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test
def kernel_exponential_modular (fm_train_real=traindat,fm_test_real=testdat, tau_coef=1.0): from shogun.Features import RealFeatures from shogun.Kernel import ExponentialKernel from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance = EuclidianDistance(feats_train, feats_train) kernel=ExponentialKernel(feats_train, feats_train, tau_coef, distance, 10) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def clustering_hierarchical_modular(fm_train=traindat, merges=3): from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import Hierarchical feats_train = RealFeatures(fm_train) distance = EuclidianDistance(feats_train, feats_train) hierarchical = Hierarchical(merges, distance) hierarchical.train() out_distance = hierarchical.get_merge_distances() out_cluster = hierarchical.get_cluster_pairs() return hierarchical, out_distance, out_cluster
def kernel_tstudent_modular (fm_train_real=traindat,fm_test_real=testdat, degree=2.0): from shogun.Features import RealFeatures from shogun.Kernel import TStudentKernel from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) kernel=TStudentKernel(feats_train, feats_train, degree, distance) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_distance_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.7): from shogun.Kernel import DistanceKernel from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance() kernel=DistanceKernel(feats_train, feats_test, width, distance) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def norm_squared_distance (): from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance print 'EuclidianDistance - NormSquared' feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) distance.set_disable_sqrt(True) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix()
def kernel_multiquadric_modular (fm_train_real=traindat,fm_test_real=testdat, shift_coef=1.0): from shogun.Features import RealFeatures from shogun.Kernel import MultiquadricKernel from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) kernel=MultiquadricKernel(feats_train, feats_train, shift_coef, distance) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_cauchy_modular (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0): from shogun.Features import RealFeatures from shogun.Kernel import CauchyKernel from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) kernel=CauchyKernel(feats_train, feats_train, sigma, distance) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def classifier_knn_modular(fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat, k=3 ): from shogun.Features import RealFeatures, Labels from shogun.Classifier import KNN from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) labels=Labels(label_train_multiclass) knn=KNN(k, distance, labels) knn_train = knn.train() output=knn.apply(feats_test).get_labels() multiple_k=knn.classify_for_multiple_k() return knn,knn_train,output,multiple_k
def clustering_kmeans_modular(fm_train=traindat, k=3): from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import KMeans from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train) distance = EuclidianDistance(feats_train, feats_train) kmeans = KMeans(k, distance) kmeans.train() out_centers = kmeans.get_cluster_centers() kmeans.get_radiuses() return out_centers, kmeans
def converter_multidimensionalscaling_modular(data): from shogun.Features import RealFeatures from shogun.Converter import MultidimensionalScaling from shogun.Distance import EuclidianDistance features = RealFeatures(data) distance_before = EuclidianDistance() distance_before.init(features, features) converter = MultidimensionalScaling() converter.set_target_dim(2) converter.set_landmark(False) embedding = converter.apply(features) distance_after = EuclidianDistance() distance_after.init(embedding, embedding) distance_matrix_after = distance_after.get_distance_matrix() distance_matrix_before = distance_before.get_distance_matrix() return numpy.linalg.norm(distance_matrix_after - distance_matrix_before) / numpy.linalg.norm(distance_matrix_before)