def run_ka(train_varnames, train_labels,test_varnames, test_labels): rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(train_varnames) X_features_test = rbf_feature.fit_transform(test_varnames) clf=SGDClassifier() result,accuracy=fit_predict(clf,"Kernel Approximation", X_features, train_labels,X_features_test, test_labels) return result,accuracy
def sklearn_sol(self, train_matrix, val_matrix, emb_matrix, emb_matrix_te, gamma ,mapping_dim, seed): rbf_feature = RBFSampler(gamma=gamma, n_components=mapping_dim, random_state=seed) emb_matrix = rbf_feature.fit_transform(emb_matrix.reshape(-1,3072)) #rau = self.arg.rau rau = 0.0001 #emb_matrix = emb_matrix[:len(self.t_data)] mu = np.mean(emb_matrix, axis=0) emb_matrix_1 = emb_matrix #- mu #emb_matrix_1 = emb_matrix emb_matrix = emb_matrix_1.T #print(np.mean(emb_matrix)) s = np.dot(emb_matrix, emb_matrix.T) a,b = s.shape identity = np.identity(a) s_inv = np.linalg.inv(s + rau * np.identity(a)) output_mu = np.mean(train_matrix, axis=0) output_norm = train_matrix# - output_mu weights = np.dot(np.dot(s_inv, emb_matrix), output_norm) #weights = np.dot(np.dot(s_inv, emb_matrix), self.t_label) pred = np.dot(emb_matrix_1, weights) # + output_mu emb_matrix_te = rbf_feature.fit_transform(emb_matrix_te.reshape(-1, 3072)) pred = np.dot(emb_matrix_te, weights) #+ output_mu mse_trace = [] for i in range(len(self.v_data)): mse_trace.append(mean_squared_error(val_matrix[i].flatten(), pred[i])) return np.mean(mse_trace)
def __init__(self, df, validation_df, rbf_gamma, rbf_ncomponents, representative_set_size, key_to_split_on, vals_to_split, product_key_to_keep, with_replacement, is_categorical, importance_weight_column_name): super().__init__( df=df, key_to_split_on=key_to_split_on, vals_to_split=vals_to_split, with_replacement=with_replacement, is_categorical=is_categorical, importance_weight_column_name=importance_weight_column_name) self.validation_df = deepcopy(validation_df) self.product_key_to_keep = product_key_to_keep self.gamma = rbf_gamma self.n_components = rbf_ncomponents self.representative_set_size = representative_set_size rbf_kernel = RBFSampler(gamma=self.gamma, n_components=self.n_components) # Get only the features of the datasets cols_to_keep = set(self.df.columns) - { self.importance_weight_column_name, self.product_key_to_keep } tr_dataset_features = pd.get_dummies(self.df[cols_to_keep], columns=[self.key_to_split_on]) val_dataset_features = pd.get_dummies(self.validation_df[cols_to_keep], columns=[self.key_to_split_on]) # Compute all feature maps using RBF Sampler phi_train = rbf_kernel.fit_transform(tr_dataset_features) phi_validation = rbf_kernel.fit_transform(val_dataset_features) # Pre-computations T1 = phi_train @ phi_validation.T @ np.ones(len(self.validation_df)) T2 = np.array( [phi_train[i, :].T @ phi_train[i, :] for i in range(len(self.df))]) # Greedily select indices for dataset best_indices = [] for i in range(1, self.representative_set_size + 1): phi_S = phi_train[best_indices, :] T3 = phi_train @ phi_S.T @ np.ones(i - 1) if len( phi_S) > 0 else np.zeros(len(self.df)) objectives = 2. / (len(self.validation_df) * i) * T1 - 1. / (i**2) * (T2 + 2 * T3) objectives[best_indices] = -np.inf best_indices.append(np.argmax(objectives)) # Set our dataset as the selected indices self.df = self.df.iloc[sorted(best_indices)].reset_index(drop=True)
class RBFSamplerSGDClassifierEstimator(BaseEstimator, TransformerMixin): def __init__(self, gamma=1.0, n_components=100, random_state=None, **kwargs): kwargs['random_state'] = random_state self.rbf_sampler = RBFSampler(gamma=gamma, n_components=n_components, random_state=random_state) self.sgdclassifier = SGDClassifier(**kwargs) def fit(self, X, y): X = self.rbf_sampler.fit_transform(X) self.sgdclassifier.fit(X, y) return self def transform(self, X, y=None): return np.sqrt(self.rbf_sampler.n_components) / np.sqrt( 2.) * self.rbf_sampler.transform(X) def predict(self, X): return self.sgdclassifier.predict(self.transform(X)) def decision_function(self, X): return self.sgdclassifier.decision_function(self.transform(X))
class RBFSamplerSGDRegressorEstimator(BaseEstimator, TransformerMixin): def __init__(self, gamma=1.0, n_components=100, random_state=None, **kwargs): kwargs['random_state'] = random_state self.rbf_sampler = RBFSampler(gamma=gamma, n_components=n_components, random_state=random_state) self.sgdregressor = SGDRegressor(**kwargs) def fit(self, X, y): X = self.rbf_sampler.fit_transform(X) self.sgdregressor.fit(X, y) return self def transform(self, X, y=None): return np.sqrt(self.rbf_sampler.n_components) / np.sqrt( 2.) * self.rbf_sampler.transform(X) def predict(self, X): return self.sgdregressor.predict(self.transform(X)) # TODO: Add kernel SVM # TODO: Add kernel ridge regressor # TODO: Add random forests / xgboost
class ExposeDetector(AnomalyDetector): """ This detector is an implementation of The EXPoSE (EXPected Similarity Estimation) algorithm as described in Markus Schneider, Wolfgang Ertel, Fabio Ramos, "Expected Similarity Estimation for Lage-Scale Batch and Streaming Anomaly Detection", arXiv 1601.06602 (2016). EXPoSE calculates the likelihood of a data point being normal by using the inner product of its feature map with kernel embedding of previous data points. This measures the similarity of a data point to previous points without assuming an underlying data distribution. There are three EXPoSE variants: incremental, windowing and decay. This implementation is based on EXPoSE with decay. All three variants have been tried on NAB but decay gives the best results.Parameters for this detector have been tuned to give the best performance. """ def __init__(self, *args, **kwargs): super(ExposeDetector, self).__init__(*args, **kwargs) self.kernel = None self.previousExposeModel = [] self.decay = 0.01 self.timestep = 0 def initialize(self): """Initializes RBFSampler for the detector""" self.kernel = RBFSampler(gamma=0.5, n_components=20000, random_state=290) def handleRecord(self, inputData): """ Returns a list [anomalyScore] calculated using a kernel based similarity method described in the comments below""" # Transform the input by approximating feature map of a Radial Basis # Function kernel using Random Kitchen Sinks approximation inputFeature = self.kernel.fit_transform( numpy.array([[inputData["value"]]])) # Compute expose model as a weighted sum of new data point's feature # map and previous data points' kernel embedding. Influence of older data # points declines with the decay factor. if self.timestep == 0: exposeModel = inputFeature else: exposeModel = ((self.decay * inputFeature) + (1 - self.decay) * self.previousExposeModel) # Update previous expose model self.previousExposeModel = exposeModel # Compute anomaly score by calculating similarity of the new data point # with expose model. The similarity measure, calculated via inner # product, is the likelihood of data point being normal. Resulting # anomaly scores are in the range of -0.02 to 1.02. anomalyScore = numpy.asscalar(1 - numpy.inner(inputFeature, exposeModel)) self.timestep += 1 return [anomalyScore]
def compute_kernel(self, X, Y=None, *args, **kwargs): # initialize RBF kernel rff_kernel = RBFSampler(*args, **kwargs) # transform data return rff_kernel.fit_transform(X)
def NaiveDecomposableGaussianORFF(X, A, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Naive ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. A : {array-like}, shape = [n_targets, n_targets] Operator of the Decomposable kernel (positive semi-definite) gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : array """ # Decompose A=BB^T u, s, v = svd(A, full_matrices=False, compute_uv=True) B = dot(diag(sqrt(s[s > eps])), v[s > eps, :]) # Sample a RFF from the scalar Gaussian kernel phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) # Create the ORFF linear operator return matrix(kron(phiX, B))
def NaiveCurlFreeGaussianORFF(X, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Naive ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : array """ phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) phiX = (phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) * phi_s.random_weights_.reshape((1, -1, phiX.shape[1]))) return matrix(phiX.reshape((-1, phiX.shape[2])))
def EfficientDivergenceFreeGaussianORFF(X, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Efficient ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : array """ phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) W = phi_s.random_weights_.reshape((1, -1, 1, phiX.shape[1])) Wn = norm(phi_s.random_weights_, axis=0).reshape((1, 1, 1, -1)) return LinearOperator( (phiX.shape[0] * X.shape[1], phiX.shape[1] * X.shape[1]), matvec=lambda b: dot(_rebase(phiX, W, Wn), b), rmatvec=lambda r: dot(_rebase(phiX, W, Wn).T, r), dtype=float)
def NaiveDivergenceFreeGaussianORFF(X, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Naive ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : array """ phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = _rebase(phi_s.fit_transform(X), phi_s.random_weights_.reshape((1, -1, 1, D)), norm(phi_s.random_weights_, axis=0).reshape((1, 1, 1, -1))) return matrix(phiX)
def transform(x_original): #x_original = x_original.reshape([1, -1]) #print x_original.shape rbf_features = RBFSampler(gamma=30, random_state=1, n_components=5300) x_trans = rbf_features.fit_transform(x_original) #x_trans = x_trans.reshape([-1]) return x_trans
def kernel_estimation(df_gender): X_train, X_test, y_train, y_test = Utils.split_data(df_gender) rbf_feature = RBFSampler() X_features = rbf_feature.fit_transform(X_train) clf = SGDClassifier() clf.fit(X_features,y_train) print("Kernel Density acc: ", clf.score(X_features, y_train))
def preprocess(X_tr, X_ts, poly_degree=1): """ If current directory contains RBFSampler.txt then use RBFSampler, otherwise, Do polynomial transform also return the combined transform, incase needed features are normalized already in the source so, only polynomial transformation is done default is 1, since 561 fetures is already too many """ rbf_path = os.path.join(os.getcwd(), 'RBFSampler.txt') if False and os.path.exists( rbf_path): # disable RBFSample features again. Didn't help with open(rbf_path, 'rt') as f: kwargs = ast.literal_eval(f.read()) transformer = RBFSampler(**kwargs) else: transformer = preprocessing.PolynomialFeatures(degree=poly_degree, interaction_only=False) X_comb_tr = transformer.fit_transform(np.concatenate(X_tr, axis=0)) X_comb_ts = transformer.transform(np.concatenate(X_ts, axis=0)) X_tr = [transformer.transform(x) for x in X_tr] X_ts = [transformer.transform(x) for x in X_ts] return X_tr, X_ts, X_comb_tr, X_comb_ts, transformer
class ClassifierRBF: def __init__(self, gamma='auto', n_components=100, random_state=None, **kwargs): self.gamma = gamma self.n_components = n_components self.random_state = random_state self.clf = classifier(**kwargs) def fit(self, X, y): if self.gamma == 'auto': D = X.shape[1] self.gamma = 1 / D self.rbf = RBFSampler(gamma=self.gamma, n_components=self.n_components, random_state=self.random_state) self.clf.fit(self.rbf.fit_transform(X), y) return self def predict(self, X): p = self.clf.predict(self.rbf.transform(X)) return p def predict_proba(self, X): p = self.clf.predict_proba(self.rbf.transform(X)) return p
def dim_bw_dataset(n_train, n_test, dim, prob_type='regression', embed_size=10000, name=None, preprocess='standardise', bw_set=None, noise_sd=0.5, seed=23): n_total = n_train + n_test rs = check_random_state(seed) X = rs.normal(loc=0.0, scale=1.0, size=(n_total, dim)) data_x = preprocessing.scale(X) signal_x_bw = np.divide(X, np.array(bw_set)) # 1 / (2 sigma^2) = gamma i.e sigma = 1 implies gamma = 0.5 # sigma = sqrt( 1.0 / 2.0 * gamma) rbf_feature = RBFSampler(gamma=0.5, n_components=200, random_state=0) trans_signal_x_bw = rbf_feature.fit_transform(signal_x_bw) alpha = rs.normal(loc=0.0, scale=1.0, size=(200)) y_0 = np.matmul(trans_signal_x_bw, alpha) if prob_type == 'regression': y_0 = standardise(y_0, low=0.0, high=1.0) y = y_0 + rs.normal(loc=0.0, scale=noise_sd, size=(n_total)) label = standardise(y) dataset = data_split(data_x, label, n_train, n_test, rs) elif prob_type == 'classification': y_0 = standardise(y_0, low=-6.0, high=6.0) y = y_0 prob = 1.0 / (1.0 + np.exp(-y)) uni_values = rs.uniform(low=0.0, high=1.0, size=len(prob)) label = (uni_values > prob).astype(int) train_x, test_x, train_y, test_y = train_test_split(data_x, label, stratify=label, test_size=float(n_test)/n_total, random_state=rs) dataset = data(train_x, test_x, train_y, test_y, name=name, embed_size=embed_size, prob_type='classification') return dataset
def build_model(train_data_path='training-data-small.txt.bz2', scale='small', C=1, gamma=0.1, kernel='rbf', chunksize=1024000): """Return the trained model by given training data and parameters (optimized C, gamma) """ if scale == 'small': if 'large' in train_data_path: raise ValueError("You can only choose small dataset in small scale") else: model = SVC(C=C, gamma=gamma, kernel=kernel) # load training data train_X, train_y = load_data(data_path=train_data_path) model.fit(train_X, train_y) else: if 'small' in train_data_path: raise ValueError("You can only choose large dataset in large scale") else: model = SGDClassifier() from sklearn.kernel_approximation import RBFSampler # kernel approximation rbf_feature = RBFSampler(gamma=gamma, random_state=1, n_components=1000) # incremental learning with SGDClassifier with bz2.open(train_data_path, 'r') as f: for chunk in chunk_file(f): print(time.time()) train_y, X = parse_lines(chunk) train_X = feature_hash(X) train_X_rbf = rbf_feature.fit_transform(train_X) model.partial_fit(train_X_rbf, train_y, classes=np.array([0, 1])) return model
def computePolyFeatures(Feature): rbf_feature = RBFSampler(gamma=0.01, n_components=50, random_state=1) PolyFeatures = rbf_feature.fit_transform(Feature) # PolyFeatures = np.copy(Feature) return PolyFeatures
def kernel_approximation(): X = [[0, 0], [1, 1], [1, 0], [0, 1]] y = [0, 0, 1, 1] rbf_feature = RBFSampler() X_features = rbf_feature.fit_transform(X) clf = SGDClassifier() clf.fit(X_features, y) clf.score(X_features, y)
class MaxvalueEntropySearch(object): def __init__(self, GPmodel): self.GPmodel = GPmodel self.y_max = max(GPmodel.yValues) self.d = GPmodel.dim def Sampling_RFM(self): #self.rbf_features = RBFSampler(gamma=1/(2*RBF(length_scale=1, length_scale_bounds=(1e-3, 1e2)).length_scale**2), n_components=1000, random_state=1) self.rbf_features = RBFSampler( gamma=1 / (2 * self.GPmodel.kernel.length_scale**2), n_components=1000, random_state=1) X_train_features = self.rbf_features.fit_transform( np.asarray(self.GPmodel.xValues)) A_inv = np.linalg.inv((X_train_features.T).dot(X_train_features) + np.eye(self.rbf_features.n_components) / self.GPmodel.beta) self.weights_mu = A_inv.dot(X_train_features.T).dot( self.GPmodel.yValues) weights_gamma = A_inv / self.GPmodel.beta self.L = np.linalg.cholesky(weights_gamma) def weigh_sampling(self): random_normal_sample = np.random.normal(0, 1, np.size(self.weights_mu)) self.sampled_weights = np.c_[self.weights_mu] + self.L.dot( np.c_[random_normal_sample]) def f_regression(self, x): X_features = self.rbf_features.fit_transform(x.reshape(1, len(x))) return -(X_features.dot(self.sampled_weights)) def single_acq(self, x, maximum): mean, std = self.GPmodel.getPrediction(x) mean = mean[0] std = std[0] if maximum < max(self.GPmodel.yValues) + 5 / self.GPmodel.beta: maximum = max(self.GPmodel.yValues) + 5 / self.GPmodel.beta normalized_max = (maximum - mean) / std pdf = norm.pdf(normalized_max) cdf = norm.cdf(normalized_max) if (cdf == 0): cdf = 1e-30 return -(normalized_max * pdf) / (2 * cdf) + np.log(cdf)
def test_sgd_regressor_rbf(loss): rng = np.random.RandomState(0) transform = RBFSampler(n_components=100, gamma=10, random_state=0) X_trans = transform.fit_transform(X) y, coef = generate_target(X_trans, rng, -0.1, 0.1) y_train = y[:n_train] y_test = y[n_train:] _test_regressor(transform, y_train, y_test, X_trans, loss=loss)
class CondexposeDetector(AnomalyDetector): """ This is a modified EXPoSE detector that integrates a conditional temporal relation between two consequtive inputs. """ def __init__(self, *args, **kwargs): super(CondexposeDetector, self).__init__(*args, **kwargs) self.kernel = None self.timestep = 0 def initialize(self, gamma=None, fourierFeatures=None): """Initializes RBFSampler for the detector""" if gamma is None: self.gamma = 0.1 else: self.gamma = gamma if fourierFeatures is None: self.fourierFeatures = 50 else: self.fourierFeatures = fourierFeatures print('parameters -- gamma={} fourierFeatures={}'.format(self.gamma, self.fourierFeatures)) self.kernel = RBFSampler(gamma=self.gamma, n_components=self.fourierFeatures, random_state=5) self.r = VRLS4(self.fourierFeatures) self.x_t = None def handleRecord(self, inputData): """ Returns a list [anomalyScore] calculated using a kernel based similarity method described in the comments below""" # Transform the input by approximating feature map of a Radial Basis # Function kernel using Random Kitchen Sinks approximation inputData = [inputData['v_{}'.format(i)] for i in range(len(inputData)-1)] inputData = (inputData-self.inputMin)/(self.inputMax-self.inputMin) #scaling step #todo: take outside and normalize all columns on their own assert (len(self.inputMin) == len(inputData)), 'normalization error, len diff' y_t = self.kernel.fit_transform(np.asarray([inputData])) if self.timestep == 0: self.x_t = y_t.copy() conditional_mean = (np.matmul(self.x_t,self.r.getCovar())) if i > 1: conditional_mean = conditional_mean/LA.norm(conditional_mean) anomalyScore = np.asscalar(1 - np.inner(y_t, conditional_mean)) self.r.update(self.x_t.T,y_t.T) self.x_t = y_t.copy() self.timestep += 1 return [anomalyScore]
def kernel_transform(self, X1, X2 = None, kernel_type = 'linear_primal', n_components = 100, gamma = 1.0): """ Forms the kernel matrix using the samples X1 Parameters: ---------- X1: np.ndarray data (n_samples1,n_features) to form a kernel of shape (n_samples1,n_samples1) X2: np.ndarray data (n_samples2,n_features) to form a kernel of shape (n_samples1,n_samples2) kernel_type : str type of kernel to be used gamma: float kernel parameter Returns: ------- X: np.ndarray the kernel of shape (n_samples,n_samples) """ if(kernel_type == 'linear'): X = linear_kernel(X1,X2) elif(kernel_type == 'rbf'): X = rbf_kernel(X1,X2,gamma) elif(kernel_type == 'tanh'): X = sigmoid_kernel(X1,X2,-gamma) elif(kernel_type == 'sin'): # X = np.sin(gamma*manhattan_distances(X1,X2)) X = np.sin(gamma*pairwise_distances(X1,X2)**2) elif(kernel_type =='TL1'): X = np.maximum(0,gamma - manhattan_distances(X1,X2)) elif(kernel_type == 'rff_primal'): rbf_feature = RBFSampler(gamma=gamma, random_state=1, n_components = n_components) X = rbf_feature.fit_transform(X1) elif(kernel_type == 'nystrom_primal'): #cannot have n_components more than n_samples1 if(n_components > X1.shape[0]): raise ValueError('n_samples should be greater than n_components') rbf_feature = Nystroem(gamma=gamma, random_state=1, n_components = n_components) X = rbf_feature.fit_transform(X1) elif(kernel_type == 'linear_primal'): X = X1 else: print('No kernel_type passed: using linear primal solver') X = X1 return X
def __init__(self, X, y, dataset, policy_name, scale=True, n_splits=10, passive=True, n_jobs=-1, overwrite=False, gamma_percentile=90, ts_sigma=0.02, ts_tau=0.02, ts_mu=0.5, save_name=None, candidate_pool_size=None): seed = RandomState(1234) self.X = np.asarray(X, dtype=np.float64) self.y = np.asarray(y) self.X = StandardScaler().fit_transform(self.X) if scale else self.X self.policy_name = policy_name self.dataset = dataset self.passive = passive self.n_jobs = n_jobs self.overwrite = overwrite self.ts_sigma = ts_sigma self.ts_tau = ts_tau self.ts_mu = ts_mu self.save_name = save_name self.candidate_pool_size = candidate_pool_size # estimate the kernel using the 90th percentile heuristic random_idx = seed.choice(X.shape[0], 1000) distances = pairwise_distances(self.X[random_idx], metric='l1') self.gamma = 1 / np.percentile(distances, 90) self.similarity_gamma = 1 / np.percentile(distances, gamma_percentile) transformer = RBFSampler(gamma=self.gamma, random_state=seed, n_components=100) self.X_transformed = transformer.fit_transform(self.X) n_samples = self.X.shape[0] train_size = min(10000, int(0.7 * n_samples)) test_size = min(20000, n_samples - train_size) splitter = StratifiedShuffleSplit(n_splits=n_splits, train_size=train_size, test_size=test_size, random_state=seed) self.kfold = list(splitter.split(self.X, self.y)) self.label_encoder = LabelEncoder() self.label_encoder.fit(y) if policy_name == 'COMB': assert len(self.label_encoder.classes_ ) == 2, 'COMB only works with binary classification.'
def test_regressor_rbf(normalize, loss): rng = np.random.RandomState(0) # approximate kernel mapping transformer = RBFSampler(n_components=100, random_state=0, gamma=10) X_trans = transformer.fit_transform(X) y, coef = generate_target(X_trans, rng, -0.1, 0.1) y_train = y[:n_train] y_test = y[n_train:] _test_regressor(transformer, X_train, y_train, X_test, y_test, X_trans, normalize=normalize, loss=loss)
def example(): from sklearn.kernel_approximation import RBFSampler from sklearn.linear_model import SGDClassifier X = [[0, 0], [1, 1], [1, 0], [0, 1]] y = [0, 0, 1, 1] rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(X) clf = SGDClassifier(max_iter=5) print(clf.fit(X_features, y)) print(clf.score(X_features, y))
def approx_kernel(kernel_structure,data_x,data_y): #print("Approx kernel") #pdb.set_trace() if kernel_structure.iloc[0].loc['kernel_type']=='RBF': #pdb.set_trace() rbf_feature = RBFSampler(gamma=1,n_components=10,random_state=1) X_features = rbf_feature.fit_transform(data_x) if kernel_structure.iloc[0].loc['kernel_type']=='ACHI2': chi2sampler = AdditiveChi2Sampler(sample_steps=10,sample_interval=1) X_features = chi2sampler.fit_transform(X, y) #todo implement the other methods return X_features
def ridge_gamma(data, log_gamma): alpha = 5.0e-07 #2.0e-06#6.25e-07 # sigma^2/n gamma = np.exp(log_gamma) print('Training with alpha:{}, gamma:{}'.format(alpha, gamma)) np.random.seed(23) rbf_feature = RBFSampler(gamma=gamma, n_components=200) trans_tr_x = rbf_feature.fit_transform(data.train_x) trans_test_x = rbf_feature.transform(data.test_x) clf = Ridge(alpha=alpha) clf.fit(trans_tr_x, data.train_y) score = clf.score(trans_test_x, data.test_y) return max(score, -1.0)
def runSVM(pickle_file,X_test_svm): full_name = glob("./pickles/"+pickle_file+"/best*")[0] with open(full_name,"rb") as f: model = pickle.load(f) if "rbf" in pickle_file: number = int(re.sub(".pickle","",re.sub(r".*best_model_","",full_name))) df = pd.read_csv(glob("./pickles/"+pickle_file+"/log*")[0]) g = float(df[df["model"] == number]["gamma"]) n = int(df[df["model"] == number]["n_components"]) rbf_feature = RBFSampler(gamma=g,n_components=n) X_test_svm = rbf_feature.fit_transform(X_test_svm) out = model.decision_function(X_test_svm) np.save("./pickles/"+pickle_file+"/prob_map_test.npy", out)
def rbf_projection_idea_main(): # Exemplo clássico de utilizar kernel RBF para aumentar a dimensionalidade dos dados (similar à SVM) # Retirado da página do sklearn from sklearn.linear_model import SGDClassifier X = [[0, 0], [1, 1], [1, 0], [0, 1]] y = [0, 0, 1, 1] rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(X) clf = SGDClassifier(max_iter=5, tol=1e-3) clf.fit(X_features, y) SGDClassifier(max_iter=5) print('Score:', clf.score(X_features, y))
def test_rbf_sampler(): """test that RBFSampler approximates kernel on random data""" # compute exact kernel gamma = 10. kernel = rbf_kernel(X, Y, gamma=gamma) # approximate kernel mapping rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42) X_trans = rbf_transform.fit_transform(X) Y_trans = rbf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) assert_array_almost_equal(kernel, kernel_approx, 1)
def test_sgd_classifier_rbf(loss): rng = np.random.RandomState(0) transform = RBFSampler(n_components=100, gamma=10, random_state=0) X_trans = transform.fit_transform(X) y, coef = generate_target(X_trans, rng, -0.1, 0.1) y_train = y[:n_train] y_test = y[n_train:] _test_classifier(transform, np.sign(y_train), np.sign(y_test), X_trans, max_iter=500, eta0=.01, loss=loss)
def test_rbf_sampler(): # test that RBFSampler approximates kernel on random data # compute exact kernel gamma = 10. kernel = rbf_kernel(X, Y, gamma=gamma) # approximate kernel mapping rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42) X_trans = rbf_transform.fit_transform(X) Y_trans = rbf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert_less_equal(np.abs(np.mean(error)), 0.01) # close to unbiased np.abs(error, out=error) assert_less_equal(np.max(error), 0.1) # nothing too far off assert_less_equal(np.mean(error), 0.05) # mean is fairly close
def trainSGD(self): sgd = SGDClassifier( loss=self.loss, penalty=self.reg, alpha=self.alpha, n_iter=self.epochs, shuffle=True, n_jobs=self.multicpu, class_weight="auto", ) # print "Classifier (sklearn SGD): training the model \t(%s)"%self.dspath if self.kernel_approx is True: rbf_feature = RBFSampler(gamma=1, n_components=100.0, random_state=1) Xk = rbf_feature.fit_transform(self.X) self.glm = OneVsRestClassifier(sgd).fit(Xk, self.Y) else: self.glm = OneVsRestClassifier(sgd).fit(self.X, self.Y) print "Classifier (sklearn SGD): Done. \t(%s)" % self.dspath
def train_models(X_train, y_train, X_test, y_test): clf = linear_model.SGDClassifier(penalty='elasticnet') print clf print "fitting a linear elasticnet (L1+L2 regularized linear classif.) with SGD" clf = clf.fit(X_train, y_train) print "score on the training set", clf.score(X_train, y_train) print "score on 80/20 split", clf.score(X_test, y_test) rbf_feature = RBFSampler(gamma=1, random_state=1) X_train_feats = rbf_feature.fit_transform(X_train) X_test_feats = rbf_feature.transform(X_test) print "fitting a linear elasticnet with SGD on RBF sampled features" clf = clf.fit(X_train_feats, y_train) print "score on the training set", clf.score(X_train_feats, y_train) print "score on 80/20 split", clf.score(X_test_feats, y_test) clf2 = RandomForestClassifier(max_depth=None, min_samples_split=3) print clf2 print "fitting a random forest" clf2 = clf2.fit(X_train, y_train) print "score on the training set", clf2.score(X_train, y_train) print "score on 80/20 split", clf2.score(X_test, y_test) clf3 = svm.SVC(kernel='linear') print clf3 print "fitting an SVM with a linear kernel" clf3 = clf3.fit(X_train, y_train) print "score on the training set", clf3.score(X_train, y_train) print "score on 80/20 split", clf3.score(X_test, y_test) clf4 = svm.SVC(kernel='rbf') print clf4 print "fitting an SVM with an RBF-kernel" clf4 = clf4.fit(X_train, y_train) print "score on the training set", clf4.score(X_train, y_train) print "score on 80/20 split", clf4.score(X_test, y_test) clf5 = linear_model.LogisticRegression(penalty='l1', tol=0.01) print clf5 print "fitting a logistic regression reg. with L1" clf5 = clf5.fit(X_train, y_train) print "score on the training set", clf5.score(X_train, y_train) print "score on 80/20 split", clf5.score(X_test, y_test)
def __init__(self, X, y, dataset, policy_name, scale=True, n_iter=10, passive=True): seed = RandomState(1234) self.X = np.asarray(X, dtype=np.float64) self.y = np.asarray(y) self.X = StandardScaler().fit_transform(self.X) if scale else self.X self.policy_name = policy_name self.dataset = dataset self.passive = passive # estimate the kernel using the 90th percentile heuristic random_idx = seed.choice(X.shape[0], 1000) distances = pairwise_distances(self.X[random_idx], metric='l1') self.gamma = 1 / np.percentile(distances, 90) transformer = RBFSampler(gamma=self.gamma, random_state=seed, n_components=100) self.X_transformed = transformer.fit_transform(self.X) n_samples = self.X.shape[0] train_size = min(10000, int(0.7 * n_samples)) test_size = min(20000, n_samples - train_size) self.kfold = StratifiedShuffleSplit(self.y, n_iter=n_iter, test_size=test_size, train_size=train_size, random_state=seed)
def EfficientDecomposableGaussianORFF(X, A, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Efficient ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. A : {array-like}, shape = [n_targets, n_targets] Operator of the Decomposable kernel (positive semi-definite) gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : Linear Operator, callable """ # Decompose A=BB^T u, s, v = svd(A, full_matrices=False, compute_uv=True) B = dot(diag(sqrt(s[s > eps])), v[s > eps, :]) # Sample a RFF from the scalar Gaussian kernel phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) # Create the ORFF linear operator cshape = (D, B.shape[0]) rshape = (X.shape[0], B.shape[1]) return LinearOperator((phiX.shape[0] * B.shape[1], D * B.shape[0]), matvec=lambda b: dot(phiX, dot(b.reshape(cshape), B)), rmatvec=lambda r: dot(phiX.T, dot(r.reshape(rshape), B.T)), dtype=float)
def EfficientCurlFreeGaussianORFF(X, gamma=1., D=100, eps=1e-5, random_state=0): r"""Return the Efficient ORFF map associated with the data X. Parameters ---------- X : {array-like}, shape = [n_samples, n_features] Samples. gamma : {float}, Gamma parameter of the RBF kernel. D : {integer}, Number of random features. eps : {float}, Cutoff threshold for the singular values of A. random_state : {integer}, Seed of the generator. Returns ------- \tilde{\Phi}(X) : array """ phi_s = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) phiX = phi_s.fit_transform(X) return LinearOperator((phiX.shape[0] * X.shape[1], phiX.shape[1]), matvec=lambda b: dot(phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) * phi_s.random_weights_.reshape((1, -1, phiX.shape[1])), b), rmatvec=lambda r: dot((phiX.reshape((phiX.shape[0], 1, phiX.shape[1])) * phi_s.random_weights_.reshape((1, -1, phiX.shape [1]))).reshape (phiX.shape[0] * X.shape[1], phiX.shape[1]).T, r), dtype=float)
def rbf_kernel(self, matrix, n_components): rbf = RBFSampler(n_components = n_components) print rbf matrix_features = rbf.fit_transform(matrix) return matrix_features
#prop['class_' + ps] = [] # restore classifier set from file classifier = joblib.load('data/' + algorithm + '-' + ps + '.pkl') # restore robust scaler from file robust_scaler = joblib.load('data/rs-' + algorithm + '-' + ps + '.pkl') # restore classes from file classes = joblib.load('data/classes-' + algorithm + '-' + ps + '.pkl') cstatus = robust_scaler.transform(cstatus_orig) if algorithm == 'kernel-approx': rbf_feature = RBFSampler(gamma=1, random_state=1) cstatus = rbf_feature.fit_transform(cstatus) prob = None if algorithm == 'one-vs-rest' or algorithm == 'linear-svm': f = np.vectorize(platt_func) raw_predictions = classifier.decision_function(cstatus) platt_predictions = f(raw_predictions) prob = platt_predictions / platt_predictions.sum(axis=1) #prob = prob.tolist() else: prob = classifier.predict_proba(cstatus).tolist() for i in range(0,len(classes)):
def main(): type_of_problem = "" split = 0.3 su_train = [] su_test = [] p = optparse.OptionParser() # take path of training data set p.add_option("--path_train", "-p", default="/afs/cern.ch/user/s/sganju/private/2014_target.csv") # what type of problem is it? regression/classification/clustering/dimensionality reduction p.add_option("--type_of_problem", "-t", default="c") # include cross validation true/false p.add_option("--cross_validation", "-v", default="True") # take the numerical values # p.add_option('--numerical_values', '-n') # specify target column p.add_option("--target", "-y") options, arguments = p.parse_args() num_values = "id cpu creator dataset dbs dtype era naccess nblk nevt nfiles nlumis nrel nsites nusers parent primds proc_evts procds rel1_0 rel1_1 rel1_2 rel1_3 rel1_4 rel1_5 rel1_6 rel1_7 rel2_0 rel2_1 rel2_10 rel2_11 rel2_2 rel2_3 rel2_4 rel2_5 rel2_6 rel2_7 rel2_8 rel2_9 rel3_0 rel3_1 rel3_10 rel3_11 rel3_12 rel3_13 rel3_14 rel3_15 rel3_16 rel3_17 rel3_18 rel3_19 rel3_2 rel3_20 rel3_21 rel3_22 rel3_23 rel3_24 rel3_25 rel3_26 rel3_3 rel3_4 rel3_5 rel3_6 rel3_7 rel3_8 rel3_9 relt_0 relt_1 relt_2 rnaccess rnusers rtotcpu s_0 s_1 s_2 s_3 s_4size tier totcpu wct" num_values = num_values.split() # load from files train = pd.read_csv(options.path_train) # load target values target = train["target"] # TRAINING DATA SET data = train print "Performing imputation." imp = data.dropna().mean() test = data.fillna(imp) data = data.fillna(imp) print "Splitting the training data with %f." % split features_train, features_test, target_train, target_test = train_test_split( data, target, test_size=split, random_state=0 ) print "Generating Model" # diffrentiate on the basis of type of problem # RANDOM FOREST CLASSIFIER rf = RandomForestClassifier(n_estimators=100) rf = rf.fit(features_train, target_train) cal_score("RANDOM FOREST CLASSIFIER", rf, features_test, target_test) # Ada boost clf_ada = AdaBoostClassifier(n_estimators=100) params = { "learning_rate": [0.05, 0.1, 0.2, 0.3, 2, 3, 5], "max_features": [0.25, 0.50, 0.75, 1], "max_depth": [3, 4, 5], } gs = GridSearchCV(clf_ada, params, cv=5, scoring="accuracy", n_jobs=4) clf_ada.fit(features_train, target_train) cal_score("ADABOOST", clf_ada, features_test, target_test) # RANDOM FOREST CLASSIFIER rf = RandomForestClassifier(n_estimators=100) rf = rf.fit(features_train, target_train) cal_score("RANDOM FOREST CLASSIFIER", rf, features_test, target_test) # predictions = rf.predict_proba(test) # Gradient Boosting gb = GradientBoostingClassifier(n_estimators=100, subsample=0.8) params = { "learning_rate": [0.05, 0.1, 0.2, 0.3, 2, 3, 5], "max_features": [0.25, 0.50, 0.75, 1], "max_depth": [3, 4, 5], } gs = GridSearchCV(gb, params, cv=5, scoring="accuracy", n_jobs=4) gs.fit(features_train, target_train) cal_score("GRADIENT BOOSTING", gs, features_test, target_test) rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(data) # SGD CLASSIFIER clf = SGDClassifier( alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate="optimal", loss="hinge", n_iter=5, n_jobs=1, penalty="l2", power_t=0.5, random_state=None, shuffle=True, verbose=0, warm_start=False, ) clf.fit(features_train, target_train) cal_score("SGD Regression", clf, features_test, target_test) # KN Classifier neigh = KNeighborsClassifier(n_neighbors=1) neigh.fit(features_train, target_train) cal_score("KN CLASSIFICATION", neigh, features_test, target_test) # predictions = neigh.predict_proba(test) # Decision Tree classifier clf_tree = tree.DecisionTreeClassifier(max_depth=10) clf_tree.fit(features_train, target_train) cal_score("DECISION TREE CLASSIFIER", clf_tree, features_test, target_test)
f.write(header) size = header.count(',') for (id, label) in zip(ids, labels): f.write('%d' % int(id)) for i in range(0, size): if i == label: f.write(',1') else: f.write(',0') f.write('\n') if __name__ == '__main__': # get X and y train_x, train_y = loadDataHelper('train_data.txt') test_x, test_id = loadDataHelper('test_data.txt') print('train size: %d %d' % (len(train_x), len(train_y))) print('test size: %d %d' % (len(test_x), len(test_id))) rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(train_x) model = SGDClassifier(loss="hinge", alpha=0.01, n_iter=200, fit_intercept=True) # model = SGDClassifier() model.fit(X_features, train_y) print(model) X_features = rbf_feature.fit_transform(test_x) predicted = model.predict(X_features) saveResult('result-sgd.csv', test_id, predicted)
X_test = X[0:nr_test] Y_test = Y[0:nr_test] X_train = X[nr_test+1:len(X)] Y_train = Y[nr_test+1:len(X)] X_train = robust_scaler.fit_transform(X_train) # save standard scaler joblib.dump(robust_scaler, base_path + 'data/rs-' + algorithm + '-' + str(ps[psi]) + '.pkl') X_test = robust_scaler.transform(X_test) if algorithm == 'kernel-approx': rbf_feature = RBFSampler(gamma=1, random_state=1) X_train = rbf_feature.fit_transform(X_train) X_test = rbf_feature.fit_transform(X_test) elif algorithm == 'mlp': n_output = len(set(Y)) #n_output = 2460 n_input = len(X_train[0]) + 1 n_neurons = int(round(sqrt(n_input*n_output))) print "N input" , n_input print "N output" , n_output print "N neurons", n_neurons classifier = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(n_input, n_neurons, n_output), random_state=1) if classifier is not None or exists_be_file is True: if cv is True: gs = GridSearchCV(classifier, parameters)
'max_features': [.25,.50,.75,1], 'max_depth': [3,4,5], } gs = GridSearchCV(gb, params, cv=5, scoring ='accuracy', n_jobs=4) gs.fit(features_train, target_train) #predictions = gs.predict_proba(test) #print predictions cal_score("GRADIENT BOOSTING",gs, features_test, target_test) #sorted(gs.grid_scores_, key = lambda x: x.mean_validation_score) #print gs.best_score_ #print gs.best_params_ #predictions = gs.predict_proba(test) #KERNEL APPROXIMATIONS - RBF rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(data) #SGD CLASSIFIER clf = SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5, random_state=None, shuffle=True, verbose=0, warm_start=False) clf.fit(features_train, target_train) cal_score("SGD Regression",clf, features_test, target_test) #KN Classifier neigh = KNeighborsClassifier(n_neighbors = 1) neigh.fit(features_train, target_train) cal_score("KN CLASSIFICATION",neigh, features_test, target_test)
classa = [0, 1, 2, 3, 4] num = len(x) - 10000 xtest, ytest, = x[num:], y[num:] x, y = x[:num], y[:num] print x[:10], y[:10] clf = clf.fit(x, y) clf2_RFC = RandomForestClassifier(random_state=0, class_weight=({1:0.25, 2:0.56, 3:0.17, 4:0.02})) clf2_RFC = clf2_RFC.fit(x, y) rbf_feature = RBFSampler(gamma=1, random_state=1) X_features = rbf_feature.fit_transform(x) X_test = rbf_feature.fit_transform(xtest) clfK = linear_model.SGDClassifier() clfK.fit(x, y) print "SGD classifier", clfK.score(xtest, ytest) #DECISION TREEE clft = tree.DecisionTreeClassifier( max_depth= 7) clft.fit(x, y) print "Tree", clft.score(xtest, ytest) #gen image fname = ["stack size: ", "num called: " , "num to call: " , "raise", "bet: " , "hand eval: " , "card info: " , "potsize: " ]
def transform(x_original, make_np=True): orig = x_original MEAN = [ 0.00213536, 0.00324656, 0.00334724, 0.00175428, 0.00349227, 0.0035413 , 0.00188289, 0.00216241, 0.00184026, 0.00351317, 0.00520942, 0.00450718, 0.00346782, 0.00300477, 0.00223811, 0.00180039, 0.00216675, 0.00381716, 0.00258565, 0.00291358, 0.00616643, 0.00237084, 0.00440006, 0.00729192, 0.00369302, 0.00058215, 0.00312047, 0.00629086, 0.00184585, 0.0018266 , 0.00329771, 0.00352135, 0.00246634, 0.00261958, 0.00357113, 0.00307333, 0.00211512, 0.00125184, 0.00212255, 0.00307451, 0.00171408, 0.0126576 , 0.00252346, 0.00528872, 0.0026387 , 0.00283739, 0.00394586, 0.00207473, 0.00307515, 0.002017 , 0.00408066, 0.00185709, 0.00316201, 0.00349098, 0.00415104, 0.00348125, 0.00069981, 0.00128145, 0.0023404 , 0.00396659, 0.00240324, 0.01251434, 0.00125352, 0.00266113, 0.00435828, 0.00066137, 0.00221134, 0.00083185, 0.00278664, 0.00118505, 0.00335414, 0.00340527, 0.0026939 , 0.00096786, 0.00214149, 0.0026521 , 0.00155538, 0.00300255, 0.0040405 , 0.00275396, 0.00077404, 0.00257667, 0.00268743, 0.00279948, 0.0018655 , 0.00239569, 0.0032419 , 0.00288355, 0.00123361, 0.00220135, 0.0021836 , 0.00225123, 0.00366629, 0.00279189, 0.00058814, 0.00310452, 0.00276981, 0.00128716, 0.00074161, 0.00358908, 0.003292 , 0.00233592, 0.00317694, 0.00381526, 0.00269197, 0.00098085, 0.00231831, 0.00133682, 0.00460957, 0.00387842, 0.0004473 , 0.0015644 , 0.00247717, 0.00179484, 0.00281831, 0.00053689, 0.00415889, 0.00232736, 0.00361601, 0.00192624, 0.00224487, 0.00210838, 0.00140079, 0.00608319, 0.00211861, 0.00230604, 0.00124033, 0.0029389 , 0.00227564, 0.00086638, 0.0035496 , 0.00228789, 0.00361703, 0.00270277, 0.00196611, 0.00206865, 0.00146788, 0.00019011, 0.00222272, 0.00351472, 0.00305718, 0.00239471, 0.00040766, 0.00299186, 0.00368983, 0.00244158, 0.00084154, 0.00109796, 0.00278565, 0.00135904, 0.00424855, 0.00323784, 0.00255397, 0.00234946, 0.00210558, 0.00291688, 0.00172516, 0.00284473, 0.00308164, 0.00316225, 0.0041659 , 0.00055891, 0.00303591, 0.00028217, 0.00261526, 0.00196658, 0.00264379, 0.00018002, 0.00227361, 0.00190785, 0.00344782, 0.00305479, 0.00057851, 0.00115452, 0.00365707, 0.0009598 , 0.00184313, 0.00286183, 0.00400594, 0.0003848 , 0.00086102, 0.00277779, 0.00214625, 0.00329827, 0.00129511, 0.00114751, 0.00249452, 0.00236266, 0.00353646, 0.00319208, 0.00540883, 0.00323167, 0.00299791, 0.00025745, 0.00227873, 0.00228826, 0.0040653 , 0.00238598, 0.00483883, 0.00054585, 0.00091663, 0.00037232, 0.0008229 , 0.00073563, 0.00283771, 0.0035899 , 0.00578833, 0.0032107 , 0.0014048 , 0.00401052, 0.002748 , 0.00229416, 0.00130351, 0.00308403, 0.00146506, 0.00188529, 0.00236308, 0.00259649, 0.00185155, 0.00230195, 0.00421584, 0.00231917, 0.00227335, 0.00296253, 0.00077996, 0.0001668 , 0.00069015, 0.00220702, 0.00238395, 0.00034903, 0.00303323, 0.00407338, 0.00178655, 0.00456887, 0.00254606, 0.00215019, 0.00306377, 0.00134979, 0.00112832, 0.00350681, 0.00253643, 0.00431348, 0.00094915, 0.00150396, 0.00043838, 0.00207101, 0.00301119, 0.00057716, 0.00062709, 0.00543404, 0.00061686, 0.00237189, 0.00522715, 0.00321869, 0.00172645, 0.00244482, 0.00334951, 0.00183201, 0.00038157, 0.0023022 , 0.00418559, 0.00329119, 0.00411452, 0.00089033, 0.00283673, 0.00210368, 0.00222242, 0.00213262, 0.0033576 , 0.00250707, 0.00423595, 0.00237407, 0.00127654, 0.00387341, 0.00216695, 0.00325004, 0.00246333, 0.00396034, 0.0031676 , 0.00354552, 0.00227099, 0.00205363, 0.00128859, 0.00290737, 0.00301655, 0.00319576, 0.00072449, 0.00230528, 0.00326406, 0.00283315, 0.00338869, 0.00212552, 0.00135612, 0.00250613, 0.00045907, 0.0014009 , 0.00177951, 0.00042544, 0.00073249, 0.00303487, 0.0013664 , 0.00248306, 0.00025601, 0.00435174, 0.00443799, 0.00479944, 0.0009997 , 0.00275155, 0.00286969, 0.00244896, 0.00177604, 0.00278218, 0.00078876, 0.00142078, 0.00186949, 0.0018215 , 0.0027254 , 0.00316367, 0.00192957, 0.00176559, 0.00289111, 0.00048977, 0.00411342, 0.00130383, 0.00250934, 0.00324275, 0.00159243, 0.00334068, 0.00324279, 0.00158259, 0.00041714, 0.00161102, 0.00145149, 0.00222112, 0.00296289, 0.00282892, 0.00123731, 0.00281891, 0.00016613, 0.0014267 , 0.00262089, 0.00367506, 0.00281706, 0.00318947, 0.00090315, 0.00230826, 0.00310803, 0.00889549, 0.00197781, 0.00160006, 0.00307063, 0.00176858, 0.00252353, 0.00141795, 0.00047073, 0.00241224, 0.00165672, 0.00138939, 0.00257068, 0.00148445, 0.00193734, 0.004368 , 0.00247817, 0.00249266, 0.00329317, 0.00078468, 0.00045822, 0.00259324, 0.00298367, 0.00335009, 0.00307879, 0.00325237, 0.00254531, 0.00749495, 0.0026701 , 0.00100689, 0.00184948, 0.00317616, 0.00255977, 0.00112342, 0.00165774, 0.00227449, 0.00064219, 0.00269639, 0.00114312, 0.00203549, 0.00064574, 0.00130932, 0.00304631, 0.00131053, 0.00174587, 0.0027975 , 0.00461148, 0.0015227 , 0.0027072 , 0.00210673, 0.00323388, 0.00028426, 0.00113429, 0.00315131] VAR = [ 3.87111312e-06, 1.29838726e-05, 1.23895436e-05, 5.11051819e-06, 1.87834728e-05, 5.81101229e-05, 1.22431672e-05, 3.14238203e-06, 6.15186426e-06, 1.16054974e-05, 2.61629851e-05, 1.51823678e-05, 3.20501352e-05, 6.75625364e-06, 6.90383937e-06, 7.10772563e-06, 3.93108356e-06, 1.38147699e-05, 9.45390664e-06, 6.18869987e-06, 1.23460353e-03, 5.15741591e-06, 1.27185867e-05, 7.62148434e-05, 9.61369316e-06, 3.59794999e-06, 4.49714597e-05, 1.15313013e-04, 2.51027515e-06, 3.23518027e-06, 1.15175054e-05, 5.55007797e-05, 3.61287015e-06, 4.24901217e-06, 1.57731133e-05, 8.83739880e-06, 4.11832891e-06, 4.51594425e-06, 5.66233716e-06, 2.76312055e-05, 3.10286633e-05, 2.06523833e-04, 4.99679342e-06, 3.59423460e-05, 5.53408014e-06, 5.02979264e-06, 2.29845095e-05, 3.52580303e-06, 4.74110466e-06, 2.77776825e-06, 1.15279947e-05, 4.78634098e-06, 8.24242505e-06, 1.65141090e-05, 1.84669015e-05, 1.65851869e-05, 9.69125917e-07, 4.07269628e-06, 4.79411492e-06, 7.95185399e-06, 6.05491604e-06, 2.30133633e-04, 2.43045915e-06, 9.99138675e-06, 1.61846281e-05, 1.36250194e-06, 3.83900385e-06, 4.03501076e-06, 4.49190746e-06, 2.20133970e-06, 1.40571788e-05, 1.23973871e-05, 1.91642968e-05, 1.83384119e-06, 3.55110501e-06, 6.38707023e-06, 7.58389225e-06, 9.66052931e-06, 1.33459561e-05, 6.01834583e-06, 1.75975058e-06, 9.93625536e-06, 5.57880408e-06, 5.20632392e-06, 2.63891241e-06, 4.96341232e-06, 1.35361419e-05, 5.09588225e-06, 2.13213362e-06, 3.67884149e-06, 4.02580880e-06, 3.36118966e-06, 1.23913905e-05, 1.19327162e-05, 1.33013390e-06, 1.56844681e-05, 5.05235129e-06, 3.27510379e-06, 4.18496352e-06, 1.32615022e-05, 8.00089632e-06, 5.24889508e-06, 7.61725520e-06, 2.45732025e-05, 4.73942392e-06, 3.26874106e-06, 4.19502445e-06, 4.67408597e-06, 4.07529951e-05, 1.85623369e-05, 1.42640177e-06, 9.02420306e-06, 3.99465979e-06, 2.91695819e-06, 7.51525182e-06, 3.28339831e-06, 9.23579413e-06, 8.82938566e-06, 1.67017625e-05, 7.18046179e-06, 6.67502140e-06, 4.53568390e-06, 4.59241197e-06, 9.71055426e-05, 4.06108283e-06, 3.21309715e-06, 2.83145362e-06, 1.30979068e-05, 4.30934096e-06, 1.33494112e-06, 1.23067054e-05, 4.55467345e-06, 4.16151366e-05, 4.39300907e-06, 3.81081336e-06, 3.57599046e-06, 2.44792045e-06, 1.04884156e-06, 5.66646773e-06, 1.38454953e-05, 7.03958785e-06, 7.96561298e-06, 1.15832827e-06, 5.34098000e-06, 1.08664502e-05, 5.33706713e-06, 1.58029233e-06, 4.16948014e-06, 1.10410603e-05, 3.08923185e-06, 3.60056097e-05, 1.35575315e-05, 7.21297470e-06, 5.46186866e-06, 3.83067878e-06, 4.93382163e-06, 8.74249160e-06, 6.95763983e-06, 8.57639945e-06, 1.99238085e-05, 2.06143616e-05, 4.15158574e-06, 6.98539924e-06, 7.29978665e-07, 1.05324242e-05, 4.03610511e-06, 4.54024757e-06, 1.12380259e-06, 7.25149490e-06, 4.68609708e-06, 4.47583007e-05, 5.73128000e-06, 1.55383559e-06, 6.10201277e-06, 1.56226083e-05, 2.07417481e-06, 3.92362694e-06, 5.07511158e-06, 1.91527526e-05, 1.23196439e-06, 2.78105795e-06, 6.20886459e-06, 9.77619759e-06, 4.54569998e-05, 3.69801329e-06, 3.90055801e-06, 8.95043365e-06, 4.62714915e-06, 8.59072207e-06, 7.93476416e-06, 2.94461267e-05, 1.27513460e-05, 6.37168538e-06, 1.42869302e-06, 3.88169829e-06, 3.73479924e-06, 3.41961106e-05, 5.99249536e-06, 3.52894229e-05, 3.60535269e-06, 1.97432492e-06, 1.08726206e-06, 6.34745318e-06, 1.85853697e-06, 4.88355657e-06, 1.45421337e-05, 4.71209759e-05, 9.75886239e-06, 1.92188254e-06, 2.44175182e-05, 6.48665880e-06, 3.77833988e-06, 4.94021824e-06, 1.11375076e-05, 2.48913056e-06, 7.50221434e-06, 7.71706724e-06, 4.40449246e-06, 5.01260110e-06, 7.55913298e-06, 9.61114153e-06, 4.71524238e-06, 5.71612330e-06, 5.35067657e-06, 1.24371020e-06, 1.05315411e-06, 3.93981671e-06, 4.10917913e-06, 4.50131192e-06, 1.41029887e-06, 5.21404239e-06, 3.10300539e-05, 2.86295992e-06, 3.14574375e-05, 4.13089781e-06, 3.94511845e-06, 5.21837923e-06, 1.86040011e-06, 4.33877122e-06, 6.79169351e-06, 7.34233345e-06, 2.46684357e-05, 6.04518227e-06, 3.50075336e-06, 1.22008735e-06, 3.82670787e-06, 1.29928488e-05, 1.30317263e-06, 1.82923403e-06, 1.68159694e-04, 1.39570985e-06, 6.82018782e-06, 2.77705938e-05, 5.50219803e-06, 6.94297855e-06, 5.56691651e-06, 4.40913139e-05, 8.64954832e-06, 1.13623461e-06, 3.91895303e-06, 2.90528320e-05, 8.95829181e-06, 2.13802762e-05, 1.45383845e-06, 2.19748855e-05, 2.92403666e-06, 4.11580346e-06, 3.79422424e-06, 1.01354981e-05, 1.12666398e-05, 2.12954971e-05, 4.73278161e-06, 2.26826965e-06, 2.45301255e-05, 5.86185180e-06, 6.92235736e-06, 8.42678526e-06, 2.47795958e-05, 6.25412728e-06, 1.41974527e-05, 3.95337688e-06, 7.16912125e-06, 2.00884144e-06, 2.00349034e-05, 5.97662651e-06, 3.01450892e-05, 4.63002816e-06, 4.09857661e-06, 1.23373959e-05, 5.62286236e-06, 1.23868932e-05, 7.79128188e-06, 4.02737664e-06, 4.26867074e-06, 1.30633550e-06, 2.16092242e-06, 2.53344988e-06, 1.55130629e-06, 1.20587686e-06, 8.47719131e-06, 1.72865161e-06, 8.85885938e-06, 1.36250583e-06, 3.02467214e-05, 2.85941868e-05, 1.68684969e-05, 2.17024274e-06, 9.09429716e-06, 1.12517072e-05, 5.39997088e-06, 3.16738113e-06, 7.44227101e-06, 1.39521345e-06, 1.80325624e-06, 3.23437991e-06, 4.12906812e-06, 6.51981136e-06, 7.28606378e-06, 4.44469608e-06, 4.00705337e-06, 1.34244753e-05, 1.34953189e-06, 3.86701616e-05, 4.30733919e-06, 4.29618197e-06, 1.67568650e-05, 5.39451612e-06, 8.50733433e-06, 1.04900918e-05, 4.68246794e-06, 2.92591087e-06, 2.54589900e-06, 6.68970689e-06, 3.68698856e-06, 5.70542637e-06, 1.57329410e-05, 3.45199222e-06, 7.27799975e-06, 8.64176250e-07, 5.59882582e-06, 4.16052401e-06, 1.73753080e-05, 7.85748797e-06, 6.46626446e-06, 2.23241624e-06, 6.79217908e-06, 6.18545939e-06, 5.41203600e-04, 2.75355566e-06, 5.01654998e-06, 9.55004050e-06, 3.36241075e-06, 4.95540827e-06, 4.38650100e-06, 2.19975452e-06, 4.99878215e-06, 2.08615031e-06, 6.57349770e-06, 6.07825138e-06, 1.82116637e-05, 3.98356104e-06, 3.02862803e-05, 1.45275531e-05, 1.80111343e-05, 1.81263109e-05, 1.37630960e-06, 1.01588605e-06, 1.09961427e-05, 7.09189456e-06, 8.63553483e-06, 1.28377215e-05, 1.15539997e-05, 4.30247032e-06, 3.69651334e-05, 1.13411365e-05, 1.43191945e-06, 2.76733205e-06, 7.03730009e-06, 4.93027252e-06, 2.72768641e-06, 3.15867713e-06, 3.51786262e-06, 1.33668414e-06, 5.15268762e-06, 2.24808552e-06, 3.91888753e-06, 1.96848802e-06, 5.96948656e-06, 6.72807533e-06, 2.52024742e-06, 4.64795350e-06, 6.00152269e-06, 4.42994740e-05, 2.59223022e-06, 4.76032620e-06, 3.15249648e-06, 1.02942457e-05, 7.54992395e-07, 2.48130225e-06, 5.97253972e-06]; x_original = np.array(x_original) x_original -= MEAN x_original /= VAR def extend_x(arr, additions=True, extension=True): if extension: x.extend(arr) if additions: x.append(scipy.std(arr)) x.append(scipy.var(arr)) x.append(sum(arr) / len(arr)) x.append(sum(np.abs(arr)) / len(arr)) x.append(min(arr)) x.append(max(arr)) x.append(scipy.mean(arr)) x.append(scipy.median(arr)) x = [] extend_x(x_original) extend_x(np.abs(x_original)) # extend_x(np.sqrt(np.abs(x_original))) # sampler1 = SkewedChi2Sampler(skewedness=0.022, n_components=50, random_state=1) # zzz1 = sampler1.fit_transform(np.abs(np.array(orig)))[0] # sampler2 = SkewedChi2Sampler(skewedness=8.5, n_components=50, random_state=1) # zzz2 = sampler2.fit_transform(np.abs(np.array(x)))[0] sampler3 = RBFSampler(gamma=0.0025, random_state=2, n_components=20) zzz3 = sampler3.fit_transform(np.array(x))[0] extend_x(list(zzz1)) extend_x(list(zzz2)) extend_x(list(zzz3)) if make_np: return np.array(x) return x
def transform(x_original, make_np=True): orig = x_original variances_str = "0.0021246993507595866 0.0032713784391997795 0.0033522806931598247 0.0017432450192796278 0.0034743692038798537 0.003637888546929857 0.0019210039127597624 0.0021841610994196136 0.0018762718393396005 0.0034590054363498003 0.0052604099446999682 0.004508790286140099 0.0035272400244497799 0.0030404807453598324 0.0022447918038096385 0.0017851536926196112 0.0021643550482296344 0.0037976255097098874 0.0025753731081197833 0.0029230906247597055 0.0060828219621099217 0.0023575999971396813 0.0043864294801700945 0.0071589655821691772 0.0036986840015399082 0.00057556662468004468 0.0030184163825898096 0.0062797556933995476 0.0018388575003994976 0.0018222650139394971 0.0032805952842698042 0.0035132540814598752 0.0024659598304896477 0.0026319448493497136 0.003572205969799843 0.0030648003435798008 0.0021365654833496528 0.0012356635529695108 0.0021261889005796605 0.0030134591283298012 0.0016100815367798148 0.012523000339860027 0.002519218599329652 0.0052571679389798714 0.0026606913287896975 0.0028296754183797139 0.0039323969569099605 0.0020691205227195992 0.0030826525382697508 0.0020232189983895653 0.0040679867872599708 0.0018371556472196301 0.0031808009477497599 0.0034889724135098699 0.0041241983089198644 0.003466312111199805 0.00070525738208999413 0.0012962120699994075 0.0023748498468496439 0.0039468429845199238 0.0024428431670496745 0.012215355168679928 0.0012535008249493743 0.0026764566235297597 0.0043243784063398552 0.00065200872076008631 0.0022265717804095869 0.00081018893256987797 0.0027757838127496974 0.0011937874021293784 0.0033124457059298595 0.0033779817461398022 0.0026583629339898352 0.00096654598538961438 0.0021773139189896237 0.002624655562289701 0.0015705430665195477 0.0030252402714297136 0.0040940954038199478 0.0027594978981697318 0.00079096095234988185 0.0026036506797997572 0.0027190828795197546 0.0027920414767097406 0.0018699793252895133 0.002401434445989645 0.0031948320317497989 0.0028928477797297309 0.001254727068959367 0.0022096979193596154 0.0021709718136396741 0.0022725767293796106 0.0036734258169697923 0.0028088068982497589 0.00058128786511008252 0.0030860261422598389 0.0028005311404197221 0.0013144850578592786 0.00075680244248994735 0.003594669478579891 0.0032807255223097792 0.0023280524667396774 0.00318162350717981 0.0038591178877899067 0.0027019215482496691 0.00097254474824969451 0.0023080437106096615 0.0013613457456093684 0.0045951612643399054 0.0038485342457099387 0.00043219164003003777 0.001528150938759669 0.0024822021413396867 0.0018061700621995042 0.0028432498431096936 0.00055539853847006056 0.004173783897349969 0.0023134058954397316 0.0035923805665898493 0.001944158411359583 0.0022174885522996423 0.0021200232347196586 0.0014086675440495285 0.0060588732600395838 0.0020999206563196006 0.002311535350179601 0.0012081675861494046 0.0029662122591298679 0.0023064668532896651 0.00086526146860972403 0.0035453290259598483 0.0022721631862096265 0.003677016888759915 0.0027193153269396897 0.0019698620481495626 0.002072663196939612 0.0014700221401894075 0.00017158202360999703 0.0022463464680696336 0.0035194326419099174 0.0030686680423197867 0.002374867405639663 0.00042710055163003362 0.0030035550561797468 0.0037270432987298683 0.0024282900953096712 0.00086048954793975898 0.0011186456857096038 0.0027912283038996942 0.0013746583237494142 0.0043072999357398533 0.0032034503423598666 0.0025760441755196838 0.0023421858856196836 0.002131599313139612 0.0029099423010796777 0.0016998768135196812 0.0028229397603697181 0.0030535556897598208 0.003180828002529861 0.0041489816552998261 0.00056885910910004086 0.0030288286590998306 0.0002859839918500021 0.0025907458249397565 0.0019840401991995621 0.0026709580203396733 0.00018365706286999837 0.0023102737736697076 0.0019214511389595858 0.0035872736249698512 0.0030397738456597189 0.00058895044087008347 0.0011302665188195724 0.0036135427626998772 0.00096930456685965713 0.0018706273234795688 0.0028471338214996859 0.0040263350593498478 0.00041504772780003257 0.00088363138039978097 0.0027967429290597077 0.0021579785680196756 0.0032100556617598404 0.0012821952431594156 0.0011697489935395071 0.0024514963691797428 0.0024098468797296444 0.0035879574826698079 0.003169685177989759 0.0053416716965498916 0.0031958328667698248 0.003017081933489743 0.00025151594039000199 0.0022886760678696417 0.0022956408480896266 0.0041254424031998971 0.0023694221563096735 0.0047916681473398276 0.00051616048678002784 0.0009364954557196728 0.00036740167022002141 0.00078959433233993142 0.0007410161818699483 0.0028233597298397656 0.0035765694441198263 0.0057271246152496317 0.0031925037529198339 0.0014168537242193022 0.0040282638127298667 0.0027408330144697043 0.0022817147531596685 0.0013110019340695283 0.0031049698000498423 0.0014794847673093696 0.0019060075812395761 0.0023860511557697102 0.0025873434738996485 0.0018797211826496064 0.0022561836261797042 0.0041991871207300085 0.0023698767044296855 0.0022702994190196093 0.0029535219055797368 0.00079702808800984168 0.00017141315798999718 0.00071072931258999632 0.0022027503444296218 0.0023522028982396696 0.00034261818457001714 0.0030124186968896794 0.0040563730303498731 0.0018014168708095377 0.0045389503904098493 0.0025631905209596659 0.0021709940360196437 0.0031014667275497628 0.0013724805472092871 0.0011206960384995625 0.0035493743115597959 0.0025190975770797062 0.0042803605014598489 0.0009058812431398496 0.0015261301214595528 0.00043206103726003953 0.002057161621769605 0.0029676093005998037 0.00059245340563008307 0.00060303803797007931 0.0055309290333298089 0.0006063130560400961 0.0024001375326397033 0.0051570050648799921 0.003216390780179791 0.001684353076369542 0.0024379539857596923 0.0033096221900098537 0.001808786421229587 0.00036056760674001951 0.0022999638755596282 0.0042300521607298008 0.0033374014801298532 0.0041061077925497727 0.00090300806356967953 0.0027771389140698217 0.0020966763969595594 0.0022364965134396191 0.0021630341014396426 0.003362866027789783 0.0025631540862897312 0.004191926116449857 0.0023811464991296992 0.0013004726735392649 0.0038548863857898333 0.0021571933421396868 0.0032544925816697214 0.0024967996225797357 0.0039128733433798774 0.0032033546653597454 0.0035349716580698469 0.0022774309789496266 0.0020827816616296431 0.001280163236199224 0.0029566993924298487 0.0030278382394197082 0.0031458574724698 0.00073484413224997748 0.0023053594018396508 0.0032629178035998552 0.0028317322999097433 0.0033847674035998084 0.0021507182045496622 0.0013635142890994728 0.0025417732184397166 0.00046798538031004748 0.0014196850140693168 0.001775496716359453 0.00041823802366003517 0.00072539019745996431 0.0030393665008997704 0.0013936213581092793 0.0024650105378997201 0.0002448311107500009 0.0043274930097698871 0.0045290280761799487 0.0047295668273101684 0.0010168427077595955 0.0027754963934396339 0.0028934546900597821 0.0024947583902996968 0.0017947966152195337 0.002808371739829744 0.00080562592018981933 0.0014184058297892733 0.0018558152750695453 0.0018534208896895739 0.0027403346575797425 0.0031581041628497997 0.0019250669095596151 0.0017553527272695774 0.002912743471719791 0.00051881062016005577 0.0041509390442198381 0.0013269250644194269 0.002515913493569724 0.0032034703723998357 0.0015867479873494805 0.0033147417203898185 0.0032343107633697474 0.0016084849715195411 0.00041333437351003248 0.0015982072633194113 0.0014028860576195891 0.0022158183125796393 0.0029487353931697447 0.0028615529172198303 0.0012540566466694289 0.0028261495420197243 0.00017822631116999813 0.0014531231202394163 0.0025906615127396855 0.0036318312786498171 0.002825987395589701 0.0032132990932597881 0.00093148496318973544 0.0022986618991797251 0.0031201742482197584 0.0088757592945090114 0.0019739854059195429 0.0015964743898695729 0.0030620168350797899 0.0017549143672195243 0.0025403744949397296 0.0013998610671793503 0.00050658872377004334 0.0024219329259397276 0.0016578000335194041 0.0014255931402395057 0.0025947821308797258 0.0015455710208097471 0.0019424337106196282 0.0043638276133198444 0.0024791513534598046 0.002471546965979776 0.0032594199180097532 0.00081732890395981583 0.00047798563291005168 0.0026265644132597047 0.0029957660721997665 0.0033466747844698567 0.0030434931783497998 0.0032186603864098446 0.0025580746428896777 0.0074381240438289309 0.0026177068932397522 0.0010374525766094667 0.0018484145568895259 0.0032105816832397539 0.0025588880273796702 0.0011027058149395553 0.00165028316301944 0.0022621210840096185 0.00063843135713010388 0.002677249425599694 0.0011529594838495104 0.0020757956716295806 0.00063164132836008679 0.0012984328854694727 0.0030668599805997697 0.0013209850432293402 0.0017350537225995246 0.0027999960618096992 0.0045968238896799086 0.0015396509469794125 0.0026842448170297231 0.0020969214423495791 0.0032249556936598013 0.00029111348006000424 0.0011551860431694666 0.0031812251568797824" #mn = np.array(map(float, means_str.split(" "))) #mn = np.fromstring(means_str) variances = np.fromstring(variances_str, sep=' ') means_str = "3.8753948237858108e-06 1.2972946111794674e-05 1.2594051521366083e-05 5.0841523278404734e-06 1.8774317409263048e-05 6.2913210996917487e-05 1.269807222669888e-05 3.2193349475262057e-06 6.5226200570272061e-06 1.1473588836338628e-05 2.7180466935587737e-05 1.4762302565458717e-05 3.3722317512532468e-05 6.8216505240041436e-06 7.1028116499628903e-06 6.5493827073439618e-06 3.80367131264172e-06 1.4028847130371071e-05 9.3773632055309283e-06 6.493323349342037e-06 0.0012533506935897218 4.9911335763841195e-06 1.2793399333055094e-05 7.251611930188133e-05 9.5489822043414659e-06 3.8895300628186868e-06 4.173457402556971e-05 0.00011347419063456421 2.5715278760111459e-06 3.2518257183024889e-06 1.1746203655396577e-05 5.564016383146592e-05 3.6296631509353909e-06 4.3289811407316681e-06 1.6025500646546836e-05 8.7246747361516438e-06 4.2410327327645271e-06 4.3732089713098806e-06 5.9073865563619062e-06 2.4944097977347468e-05 2.6986158170267078e-05 0.00019357426874984057 5.1764074423215301e-06 3.5213588425492417e-05 5.6548098935816624e-06 4.9935937088475483e-06 2.3828362907972465e-05 3.521023866293484e-06 4.9870702736337188e-06 2.7658266039366798e-06 1.1424139609302174e-05 4.6380793952958809e-06 8.1857174384998292e-06 1.6642225648910047e-05 1.8268643132929127e-05 1.5473118685259949e-05 9.7616078787441458e-07 4.1097607144367696e-06 5.0459663323074957e-06 8.1752036387080678e-06 6.2517426726346483e-06 0.00021128251533625498 2.4441154311918049e-06 1.0193291769369655e-05 1.6000078417860217e-05 1.3360615760691735e-06 3.9318274983244583e-06 3.7424801978201094e-06 4.5859948912655592e-06 2.1863893895928264e-06 1.4465960374765088e-05 1.226800721873276e-05 1.8464105024954982e-05 1.6648636202068534e-06 3.6936226607579947e-06 6.5624020308052344e-06 8.1339303452353934e-06 9.5047711128428641e-06 1.4246167594118415e-05 6.0140973294197884e-06 1.8256200156735017e-06 1.0903757639504039e-05 5.5080914174679564e-06 5.2142169736994904e-06 2.6292604236996645e-06 4.9623024158512934e-06 1.3171420269231491e-05 5.1064782563443342e-06 2.2201233797532346e-06 3.5523146873797785e-06 4.0447453033151591e-06 3.4393314844283629e-06 1.2283374778942664e-05 1.2292876875817127e-05 1.3500473667799135e-06 1.5982740863426082e-05 5.1149263226338105e-06 3.6545265412690049e-06 4.4324293930103502e-06 1.3464151551507424e-05 8.2607323905827565e-06 5.3487969307959027e-06 7.699747933440781e-06 2.6028092053793074e-05 4.6160336251911396e-06 3.4679078250202434e-06 4.1733322591036512e-06 5.3685295356327671e-06 4.1690461279070458e-05 1.8175584863744415e-05 1.4529974714941822e-06 8.9646541680474962e-06 3.8638936584656166e-06 2.9622882868516527e-06 7.0496709821419259e-06 3.1582263769680431e-06 9.405912339046591e-06 9.0755581225100531e-06 1.6325319116706371e-05 7.4249528783198223e-06 6.4142049677004635e-06 4.5308256388559377e-06 4.3379101302365048e-06 0.00010082767573262403 3.8073220474859233e-06 3.2462395975613701e-06 2.7311928376618711e-06 1.3798802536934602e-05 4.3141812822167945e-06 1.3418830948478911e-06 1.2429912124862659e-05 4.5075176921294976e-06 5.1646366657811792e-05 4.5044907401523191e-06 3.8984503442526084e-06 3.5443432542494581e-06 2.4525978397502771e-06 9.3143290305042167e-07 5.5977615024444758e-06 1.4190797086073543e-05 6.9561233764939789e-06 8.0114861452901582e-06 1.2454920191746878e-06 5.5587154982870272e-06 1.0799672251505274e-05 5.2959102834492533e-06 1.5685688647449261e-06 4.0529428722210623e-06 1.1678512895855624e-05 3.2192802988981066e-06 3.7209970472627806e-05 1.3342539819491425e-05 7.8622903069455567e-06 5.2192321914900928e-06 3.9052134579505441e-06 5.0680769571043553e-06 7.9552828837898563e-06 6.7762118492538826e-06 8.5875102642240075e-06 2.0992545616427373e-05 2.0487505271743291e-05 4.3745997535029968e-06 7.1046977878669946e-06 7.7167495498190023e-07 1.0141932308464567e-05 4.2219873766408028e-06 4.5710190852658248e-06 1.1970402654479661e-06 7.6102614732724262e-06 4.6239298630603015e-06 4.9995946799371758e-05 5.7956634809724437e-06 1.5024720589152287e-06 6.0635032731039673e-06 1.5391627780641011e-05 2.178652052162647e-06 4.2030056647134055e-06 5.0822379579415565e-06 1.9836303495641017e-05 1.8930994307717652e-06 3.0604158961858623e-06 6.5280625603827021e-06 1.0265727137904331e-05 4.0302422231094213e-05 3.7750836192671517e-06 4.0367914908354297e-06 8.0446362665366717e-06 4.7656248380853414e-06 8.6978972436276061e-06 7.9679700766206762e-06 2.9451374286812033e-05 1.3111273739035649e-05 6.6028118897700181e-06 1.4941804584231896e-06 3.9528326512917906e-06 3.847295383196301e-06 3.5756600152130488e-05 6.10565382283349e-06 3.5891435340776665e-05 3.6066217532076844e-06 2.0888559126779404e-06 1.0755002920858641e-06 5.8998610038911923e-06 1.9512692088167549e-06 5.0713400804749472e-06 1.4585512351101608e-05 4.822908984311966e-05 9.8016096252778945e-06 1.9911328814957375e-06 2.4764204976600043e-05 6.4805250037636707e-06 3.7935478658080509e-06 5.1083549212952252e-06 1.1189053457458745e-05 2.5200508287861594e-06 7.8373349366817099e-06 7.9847294470099685e-06 4.3095275213819756e-06 5.0268163315597379e-06 7.4832981742681862e-06 9.2408501776852945e-06 4.784135850487231e-06 5.6532252724841891e-06 5.3930817570733614e-06 1.2687973462442569e-06 1.0372124095824449e-06 4.1435096417718113e-06 3.9981959056867675e-06 4.3520178967986713e-06 1.4659748060826231e-06 5.3366902864809163e-06 3.1416765924193689e-05 2.9749844077512922e-06 3.1381515491784522e-05 4.3260417959669591e-06 4.197030498717592e-06 5.306570430382929e-06 1.8883854746421685e-06 4.1937519548496871e-06 6.9194038197555032e-06 7.1767073252994241e-06 2.4833484439498967e-05 5.8383610252210572e-06 3.6243330253608428e-06 1.18902799300137e-06 3.8963636200265115e-06 1.2883918919165478e-05 1.3605525456692033e-06 1.7407965336936251e-06 0.00016857768522088627 1.4100686994311071e-06 7.1668903489840609e-06 2.7108318215380169e-05 5.4590558436375845e-06 6.2033647867466643e-06 5.6859033955868132e-06 4.3241078188076546e-05 9.0432098151017242e-06 1.0594888618529579e-06 4.0484870451845699e-06 2.9548153849811755e-05 9.410471996079331e-06 2.1009809505791367e-05 1.4939978216919125e-06 2.1026313371938338e-05 2.912760631269843e-06 4.1130865661336849e-06 4.0964425120045752e-06 1.0334704132812778e-05 1.1639088987295558e-05 2.0866544215744135e-05 4.7665503013673322e-06 2.4282885077105844e-06 2.4696946110127049e-05 5.8943453758772547e-06 7.0559765519393299e-06 8.6495232917104309e-06 2.4674527585132413e-05 6.5466440985476235e-06 1.4291488938382783e-05 4.0363838996778781e-06 7.5171096440058871e-06 1.7659216070078882e-06 2.3552682868282767e-05 6.0075484731317116e-06 2.9678689121826856e-05 4.5688281985000224e-06 4.2587818969459276e-06 1.2282850125910679e-05 5.6981633973611215e-06 1.2193919548692016e-05 7.7909581862542261e-06 4.1995999932004883e-06 4.2310001927379966e-06 1.4034983645177226e-06 2.2253775626039904e-06 2.5484625453534006e-06 1.5024773624760737e-06 1.1886813960082901e-06 8.943485028332714e-06 1.802211533446878e-06 8.7804607030574995e-06 1.4714171056899874e-06 3.039182778117474e-05 2.9469599285561173e-05 1.6190782721728404e-05 2.1980748656966054e-06 9.1492500963304843e-06 1.3139192984142854e-05 5.5841754669416901e-06 3.2663084979403296e-06 7.8300182408015622e-06 1.4650747681293603e-06 1.8418132244867557e-06 3.1634249051793445e-06 4.2879811205541378e-06 6.821776038991282e-06 7.2547994800721606e-06 4.5762861000866325e-06 4.0033741553487421e-06 1.3944663969273685e-05 1.5205123797572826e-06 3.7950333845819879e-05 4.6914603422440762e-06 4.3642212832058213e-06 1.6888537402380868e-05 5.3097299301474431e-06 8.5974973592752354e-06 1.0183715675617148e-05 4.4233012671049924e-06 2.8020268713604479e-06 2.4903519176724564e-06 6.0367933560789913e-06 3.6066482258866671e-06 5.5465358638439433e-06 1.6406145480373579e-05 3.6475034103942783e-06 7.545922378704344e-06 1.0510117913470496e-06 6.383917613657175e-06 4.1469930879045612e-06 1.5104979761103841e-05 7.9249357960338965e-06 6.6303162793237734e-06 2.3058946881412919e-06 6.93384276789908e-06 6.2217404008410318e-06 0.00053927751612010478 2.7688222907463807e-06 5.1593082062395665e-06 9.4327080926443393e-06 3.3336519843947502e-06 5.1198323130590842e-06 4.5094438342118166e-06 2.6237274608190453e-06 5.1693775448212788e-06 2.1082108591551617e-06 6.8329929120308474e-06 6.2018823452726071e-06 2.1240415994925091e-05 4.0243827456115514e-06 3.0522891049621393e-05 1.4011920974680818e-05 1.7239640547533074e-05 1.7993086639426091e-05 1.4355334226673438e-06 1.1012319919274514e-06 1.0614538321433708e-05 7.2890435254277739e-06 8.5872764781091643e-06 1.3084966706891505e-05 1.1094006709484758e-05 4.2456925142930984e-06 3.6872244517667462e-05 1.0859154502284048e-05 1.5319903891298572e-06 2.7727900163087534e-06 7.2483213769211959e-06 5.1159362377455894e-06 2.6822480525986132e-06 2.889767166323531e-06 3.55288675821463e-06 1.3380456162305463e-06 5.2278105015869388e-06 2.3031150972921671e-06 4.1508531796520333e-06 1.8528326040776206e-06 6.3815646996712558e-06 6.9338240811962186e-06 2.5793558575700516e-06 4.3737400474318956e-06 6.0837447954729297e-06 4.7903414469400619e-05 2.8013740155544375e-06 4.7622560053896967e-06 3.250652556381526e-06 9.5664014501971676e-06 8.2542503434926804e-07 2.5912870572853299e-06 6.0526418572379129e-06" #variances = np.array(map(float, variances_str.split(" "))) means = np.fromstring(means_str, sep=' ') x_original = np.array(x_original) #x_original -= means #x_original /= variances x_original -= means x_original /= variances #x_original = np.delete(xxxx_original, features_ordered_by_importance2[-1:]) #most_important_features1 = np.delete(x_original, features_ordered_by_importance2[5:]) x = [] def sqr(x): return x * x def sqr3(x): return x * x * x def e_pow(x): return math.exp(x) def me_pow(x): return math.exp(-x) def fred(x): return round(math.fabs(x) * 1000) def extend_x(arr, additions=True, extension=True): if extension: x.extend(arr) if additions: x.append(scipy.std(arr)) x.append(scipy.var(arr)) x.append(sum(arr) / len(arr)) x.append(sum(np.abs(arr)) / len(arr)) x.append(min(arr)) x.append(max(arr)) x.append(scipy.mean(arr)) x.append(scipy.median(arr)) def count_smaller_ratio(arr, delta): return sum(1 if el <= delta else 0 for el in arr) / len(arr) if True: extend_x(x_original) extend_x(np.sqrt(np.abs(x_original))) extend_x(np.abs(x_original)) #rbf_feature = RBFSampler(gamma=0.0025, random_state=2, n_components=20) #zzz = rbf_feature.fit_transform(np.array(x))[0] #extend_x(list(zzz)) if False: extend_x(x_original) extend_x(np.sqrt(np.abs(x_original))) extend_x(np.abs(x_original)) sampler1 = SkewedChi2Sampler(skewedness=0.022, n_components=50, random_state=1) zzz1 = sampler1.fit_transform(np.array(orig))[0] #sampler2 = SkewedChi2Sampler(skewedness=8.5, n_components=50, random_state=1) #zzz2 = sampler2.fit_transform(np.array([i + 1.0 for i in x]))[0] sampler3 = RBFSampler(gamma=0.0025, random_state=2, n_components=20) zzz3 = sampler3.fit_transform(np.array(x))[0] x = [] extend_x(x_original) #extend_x(np.abs(x_original)) #extend_x(np.sqrt(np.abs(x_original))) extend_x(list(zzz1)) #extend_x(list(zzz2)) extend_x(list(zzz3)) if False: #rbf_feature = RBFSampler(gamma=0.0025, random_state=2, n_components=100) #zzz = rbf_feature.fit_transform(np.array(x_original))[0] #extend_x(list(zzz)) pass if False: extend_x(x_original) extend_x(np.sqrt(np.abs(x_original))) extend_x(np.abs(x_original)) #for i in x_original: # print i # # x.append(count_smaller_ratio(x_original, 0.1)) # x.append(count_smaller_ratio(x_original, 0.2)) # x.append(count_smaller_ratio(x_original, 0.3)) # x.append(count_smaller_ratio(x_original, 0.4)) # x.append(count_smaller_ratio(x_original, 0.5)) # x.append(count_smaller_ratio(x_original, 0.6)) # x.append(count_smaller_ratio(x_original, 0.7)) # x.append(count_smaller_ratio(x_original, 0.8)) # x.append(count_smaller_ratio(x_original, 0.9)) # x.append(count_smaller_ratio(x_original, 1.0)) # x.append(count_smaller_ratio(x_original, -0.1)) # x.append(count_smaller_ratio(x_original, -0.2)) # x.append(count_smaller_ratio(x_original, -0.3)) # x.append(count_smaller_ratio(x_original, -0.4)) # x.append(count_smaller_ratio(x_original, -0.5)) # x.append(count_smaller_ratio(x_original, -0.6)) # x.append(count_smaller_ratio(x_original, -0.7)) # x.append(count_smaller_ratio(x_original, -0.8)) # x.append(count_smaller_ratio(x_original, -0.9)) # x.append(count_smaller_ratio(x_original, -1.0)) #x.append(count_smaller_ratio(x_original, 0.01)) #x.append(count_smaller_ratio(x_original, 0.001))for i in x_original: print i #x.append(count_smaller_ratio(x_original, 0.0001)) #x.append(count_smaller_ratio(x_original, 0.00001)) #x.append(count_smaller_ratio(x_original, 0.000001)) #x.append(count_smaller_ratio(x_original, 0.00000000001)) # Do something with most_important_features1 #extend_x(np.expm1(x_original)) #extend_x(np.square(x_original)) #extend_x(map(me_pow, x_original)) #extend_x(np.sqrt(np.sqrt(np.abs(x_original)))) #extend_x((np.sqrt(np.sqrt(orig)) - np.sqrt(np.sqrt(means))) / np.sqrt(np.sqrt(variances))) #extend_x([(-1 if i < 0 else (0 if i == 0 else 1)) for i in x_original]) #x.append(sum([i if i > 0 else 0 for i in x_original]) / len(x_original)) #x.append(sum([i if i < 0 else 0 for i in x_original]) / len(x_original)) #extend_x(np.tanh(x_original)) #extend_x(np.cos(x_original)) #extend_x(map(e_pow, x_original)) #extend_x(np.sqrt()) #extend_x(np.sqrt(np.abs(x_original))) #extend_x((np.sqrt(orig) - np.sqrt(means)) / np.sqrt(variances)) #extend_x(map(e_pow, x_original)) #extend_x(map(sqr, map(e_pow, x_original))) #x.append(sum(np.abs(x_original)) / len(x_original)) #x.append(1.) #x.extend(map(math.sin, x_original)) #x.extend(map(math.sin, map(math.sqrt, x_original))) #extend_x(map(math.sqrt, map(e_pow, x_original))) #extend_x(map(math.sqrt, map(math.sqrt, x_original))) #x.extend(map(fred, x_original)) #x.extend(map(sqr3, x_original)) #x.extend(map(me_pow, x_original)) #x.extend(map(math.log, x_original)) if make_np: return np.array(x) return x
# K(x, y) = exp(-gamma ||x-y||^2) # sigma = sqrt( 1/(2*gamma) ) # gamma = 1/(2*sigma^2) num_of_samples = 14000 X = np.random.random((num_of_samples,5)) sampling_percentage = 0.05 start_time = time.time() RFF = RBFSampler(gamma=1,n_components= int(num_of_samples*sampling_percentage)) V = RFF.fit_transform(X) RFF_estimated_kernel = V.dot(V.T) print("--- RFF Time : %s seconds ---" % (time.time() - start_time)) start_time = time.time() N = Nystroem(gamma=1,n_components= int(num_of_samples*sampling_percentage)) V = N.fit_transform(X) estimated_kernel = V.dot(V.T) print("--- Nystrom Time : %s seconds ---" % (time.time() - start_time)) start_time = time.time() real_kernel = sklearn.metrics.pairwise.rbf_kernel(X, gamma=1)